From 7b2743be7558ee875a319371bcceb814b166b020 Mon Sep 17 00:00:00 2001 From: Epithumia Date: Wed, 18 Dec 2013 14:32:13 +0100 Subject: [PATCH 1/3] =?UTF-8?q?Dynamically=20mangle=20long=20table=20names?= =?UTF-8?q?=20for=20Oracle;=20Unicode=20=E2=86=92=20UnicodeText.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/usage.rst | 12 ++++++------ pokedex/db/__init__.py | 7 +++++++ pokedex/db/load.py | 25 ++++++++++++++++++++++++- pokedex/db/oracle.py | 41 +++++++++++++++++++++++++++++++++++++++++ pokedex/db/tables.py | 8 ++++---- pokedex/doc/tabledoc.py | 2 ++ 6 files changed, 84 insertions(+), 11 deletions(-) create mode 100644 pokedex/db/oracle.py diff --git a/doc/usage.rst b/doc/usage.rst index 2d3c08b..50b39fc 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -108,9 +108,9 @@ For example, you can get a list of all pokémon species, sorted by their Charmander Charmeleon ... - Keldeo - Meloetta - Genesect + Xerneas + Yveltal + Zygarde Or to order by :attr:`~pokedex.db.tables.PokemonSpecies.name`: @@ -123,7 +123,7 @@ Or to order by :attr:`~pokedex.db.tables.PokemonSpecies.name`: Abomasnow ... - Zweilous + Zygarde Filtering @@ -169,9 +169,9 @@ example: Petal Dance (120) Power Whip (120) Seed Flare (120) - SolarBeam (120) + Solar Beam (120) Wood Hammer (120) - Leaf Storm (140) + Leaf Storm (130) Frenzy Plant (150) That concludes our brief tutorial. diff --git a/pokedex/db/__init__.py b/pokedex/db/__init__.py index 464629d..ddcf799 100644 --- a/pokedex/db/__init__.py +++ b/pokedex/db/__init__.py @@ -38,6 +38,13 @@ def connect(uri=None, session_args={}, engine_args={}, engine_prefix=''): table.kwargs['mysql_engine'] = 'InnoDB' table.kwargs['mysql_charset'] = 'utf8' + ### Do some fixery for Oracle + if uri.startswith('oracle:') or uri.startswith('oracle+cx_oracle:'): + # Oracle requires auto_setinputsizes=False (or at least a special + # set of exclusions from it, which I don't know) + if 'auto_setinputsizes' not in uri: + uri += '?auto_setinputsizes=FALSE' + ### Connect engine_args[engine_prefix + 'url'] = uri engine = engine_from_config(engine_args, prefix=engine_prefix) diff --git a/pokedex/db/load.py b/pokedex/db/load.py index d241519..ec5d6d9 100644 --- a/pokedex/db/load.py +++ b/pokedex/db/load.py @@ -11,6 +11,7 @@ import pokedex from pokedex.db import metadata, tables, translations from pokedex.defaults import get_default_csv_dir from pokedex.db.dependencies import find_dependent_tables +from pokedex.db.oracle import rewrite_long_table_names def _get_table_names(metadata, patterns): @@ -138,6 +139,14 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s table_names = _get_table_names(metadata, tables) table_objs = [metadata.tables[name] for name in table_names] + # Oracle fixery, load needs short names + # flag for oracle stuff + oranames = (session.connection().dialect.name == 'oracle') + if oranames: + # Shorten table names, Oracle limits table and column names to 30 chars + # Make a dictionary to match old<->new names + oradict = rewrite_long_table_names() + if recursive: table_objs.extend(find_dependent_tables(table_objs)) @@ -181,9 +190,11 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s insert_stmt = table_obj.insert() print_start(table_name) - try: csvpath = "%s/%s.csv" % (directory, table_name) + # In oracle mode, use the original names instead of current + if oranames: + csvpath = "%s/%s.csv" % (directory, oradict[table_name]) csvfile = open(csvpath, 'rb') except IOError: # File doesn't exist; don't load anything! @@ -250,6 +261,9 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s for column_name, value in zip(column_names, csvs): column = table_obj.c[column_name] + # Oracle treats empty strings as NULL + if not column.nullable and value == '' and oranames: + value = ' ' if column.nullable and value == '': # Empty string in a nullable column really means NULL value = None @@ -369,6 +383,11 @@ def dump(session, tables=[], directory=None, verbose=False, langs=['en']): table_names = _get_table_names(metadata, tables) table_names.sort() + # Oracle fixery : read from short table names, dump long names + oranames = (session.connection().dialect.name == 'oracle') + if oranames: + # Make a dictionary to match old<->new names + oradict = rewrite_long_table_names() for table_name in table_names: print_start(table_name) @@ -376,6 +395,10 @@ def dump(session, tables=[], directory=None, verbose=False, langs=['en']): writer = csv.writer(open("%s/%s.csv" % (directory, table_name), 'wb'), lineterminator='\n') + # In oracle mode, use the original names instead of current + if oranames: + writer = csv.writer(open("%s/%s.csv" % (directory, oradict[table_name]), 'wb'), + lineterminator='\n') columns = [col.name for col in table.columns] # For name tables, dump rows for official languages, as well as diff --git a/pokedex/db/oracle.py b/pokedex/db/oracle.py new file mode 100644 index 0000000..81b2105 --- /dev/null +++ b/pokedex/db/oracle.py @@ -0,0 +1,41 @@ +from pokedex.db import metadata + +### Helper functions for oracle +def rewrite_long_table_names(): + """Modifies the table names to disenvowel long table names. + Takes the metadata from memory or uses the imported one. + + Returns a dictionary matching short names -> long names. + """ + + # Load table names from metadata + t_names = metadata.tables.keys() + + table_names = list(t_names) + table_objs = [metadata.tables[name] for name in table_names] + + # Prepare a dictionary to match old<->new names + dictionary = {} + + # Shorten table names, Oracle limits table and column names to 30 chars + for table in table_objs: + table._orginal_name = table.name[:] + dictionary[table.name]=table._orginal_name + if len(table._orginal_name) > 30: + for letter in ['a', 'e', 'i', 'o', 'u', 'y']: + table.name=table.name.replace(letter,'') + dictionary[table.name]=table._orginal_name + return dictionary + + +def restore_long_table_names(metadata,dictionary): + """Modifies the table names to restore the long-naming. + + `metadata` + The metadata to restore. + + `dictionary` + The dictionary matching short name -> long name. + """ + for table in metadata.tables.values(): + table.name = dictionary[table.name] diff --git a/pokedex/db/tables.py b/pokedex/db/tables.py index b24ea86..5f42079 100644 --- a/pokedex/db/tables.py +++ b/pokedex/db/tables.py @@ -38,7 +38,7 @@ from sqlalchemy.orm.session import Session from sqlalchemy.orm.interfaces import AttributeExtension from sqlalchemy.sql import and_, or_ from sqlalchemy.schema import ColumnDefault -from sqlalchemy.types import Boolean, Enum, Integer, SmallInteger, Unicode +from sqlalchemy.types import Boolean, Enum, Integer, SmallInteger, Unicode, UnicodeText from pokedex.db import markdown, multilang @@ -130,7 +130,7 @@ create_translation_table('ability_names', Ability, 'names', info=dict(description="The name", format='plaintext', official=True, ripped=True)), ) create_translation_table('ability_prose', Ability, 'prose', - effect = Column(Unicode(4000), nullable=True, + effect = Column(UnicodeText(), nullable=True, info=dict(description="A detailed description of this ability's effect", format='markdown', string_getter=markdown.MarkdownString)), short_effect = Column(Unicode(512), nullable=True, info=dict(description="A short summary of this ability's effect", format='markdown', string_getter=markdown.MarkdownString)), @@ -920,7 +920,7 @@ create_translation_table('item_names', Item, 'names', create_translation_table('item_prose', Item, 'prose', short_effect = Column(Unicode(256), nullable=True, info=dict(description="A short summary of the effect", format='markdown', string_getter=markdown.MarkdownString)), - effect = Column(Unicode(4000), nullable=True, + effect = Column(UnicodeText(), nullable=True, info=dict(description=u"Detailed description of the item's effect.", format='markdown', string_getter=markdown.MarkdownString)), ) create_translation_table('item_flavor_summaries', Item, 'flavor_summaries', @@ -1219,7 +1219,7 @@ class MoveEffect(TableBase): create_translation_table('move_effect_prose', MoveEffect, 'prose', short_effect = Column(Unicode(256), nullable=True, info=dict(description="A short summary of the effect", format='markdown')), - effect = Column(Unicode(4000), nullable=True, + effect = Column(UnicodeText(), nullable=True, info=dict(description="A detailed description of the effect", format='markdown')), ) diff --git a/pokedex/doc/tabledoc.py b/pokedex/doc/tabledoc.py index 172a454..755303b 100644 --- a/pokedex/doc/tabledoc.py +++ b/pokedex/doc/tabledoc.py @@ -56,6 +56,8 @@ def column_type_str(column): return 'bool' if type(type_) == types.Unicode: return u'unicode – %s' % column.info['format'] + if type(type_) == types.UnicodeText: + return u'unicode – %s' % column.info['format'] if type(type_) == types.Enum: return 'enum: [%s]' % ', '.join(type_.enums) if type(type_) == markdown.MarkdownColumn: From 5b759feaa2cb193243c37c33df912089c3c73333 Mon Sep 17 00:00:00 2001 From: "Lynn \"Zhorken\" Vaughan" Date: Fri, 21 Feb 2014 16:21:36 -0500 Subject: [PATCH 2/3] Sort out all the non-nullable columns with empty values. --- pokedex/data/csv/item_categories.csv | 4 ++-- pokedex/data/csv/item_category_prose.csv | 4 ++-- pokedex/db/load.py | 3 --- pokedex/db/tables.py | 2 +- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pokedex/data/csv/item_categories.csv b/pokedex/data/csv/item_categories.csv index 75e9d24..d2e2082 100644 --- a/pokedex/data/csv/item_categories.csv +++ b/pokedex/data/csv/item_categories.csv @@ -23,7 +23,7 @@ id,pocket_id,identifier 22,8,plot-advancement 23,8,unused 24,1,loot -25,6, +25,6,all-mail 26,2,vitamins 27,2,healing 28,2,pp-recovery @@ -34,7 +34,7 @@ id,pocket_id,identifier 34,3,standard-balls 35,1,dex-completion 36,1,scarves -37,4, +37,4,all-machines 38,7,flutes 39,3,apricorn-balls 40,8,apricorn-box diff --git a/pokedex/data/csv/item_category_prose.csv b/pokedex/data/csv/item_category_prose.csv index ead8c30..6ac4725 100644 --- a/pokedex/data/csv/item_category_prose.csv +++ b/pokedex/data/csv/item_category_prose.csv @@ -23,7 +23,7 @@ item_category_id,local_language_id,name 22,9,Plot advancement 23,9,Unused 24,9,Loot -25,9, +25,9,All mail 26,9,Vitamins 27,9,Healing 28,9,PP recovery @@ -34,7 +34,7 @@ item_category_id,local_language_id,name 34,9,Standard balls 35,9,Dex completion 36,9,Scarves -37,9, +37,9,All machines 38,9,Flutes 39,9,Apricorn balls 40,9,Apricorn Box diff --git a/pokedex/db/load.py b/pokedex/db/load.py index ec5d6d9..ee09721 100644 --- a/pokedex/db/load.py +++ b/pokedex/db/load.py @@ -261,9 +261,6 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s for column_name, value in zip(column_names, csvs): column = table_obj.c[column_name] - # Oracle treats empty strings as NULL - if not column.nullable and value == '' and oranames: - value = ' ' if column.nullable and value == '': # Empty string in a nullable column really means NULL value = None diff --git a/pokedex/db/tables.py b/pokedex/db/tables.py index 5f42079..9c85a36 100644 --- a/pokedex/db/tables.py +++ b/pokedex/db/tables.py @@ -1062,7 +1062,7 @@ class LocationArea(TableBase): create_translation_table('location_area_prose', LocationArea, 'prose', relation_lazy='joined', - name = Column(Unicode(64), nullable=False, index=True, + name = Column(Unicode(64), nullable=True, index=True, info=dict(description="The name", format='plaintext', official=False)), ) From f54a4caacaf0c9e31d621d58f0a05351c134d6c1 Mon Sep 17 00:00:00 2001 From: "Lynn \"Zhorken\" Vaughan" Date: Fri, 21 Feb 2014 16:45:47 -0500 Subject: [PATCH 3/3] General code tidying for this Oracle fix. --- pokedex/db/__init__.py | 6 ++---- pokedex/db/load.py | 46 +++++++++++++++++++++--------------------- pokedex/db/oracle.py | 35 +++++++++----------------------- 3 files changed, 35 insertions(+), 52 deletions(-) diff --git a/pokedex/db/__init__.py b/pokedex/db/__init__.py index ddcf799..3e8ecb1 100644 --- a/pokedex/db/__init__.py +++ b/pokedex/db/__init__.py @@ -25,7 +25,7 @@ def connect(uri=None, session_args={}, engine_args={}, engine_prefix=''): if uri is None: uri = get_default_db_uri() - ### Do some fixery for MySQL + ### Do some fixery for specific RDBMSes if uri.startswith('mysql:'): # MySQL uses latin1 for connections by default even if the server is # otherwise oozing with utf8; charset fixes this @@ -37,9 +37,7 @@ def connect(uri=None, session_args={}, engine_args={}, engine_prefix=''): for table in metadata.tables.values(): table.kwargs['mysql_engine'] = 'InnoDB' table.kwargs['mysql_charset'] = 'utf8' - - ### Do some fixery for Oracle - if uri.startswith('oracle:') or uri.startswith('oracle+cx_oracle:'): + elif uri.startswith(('oracle:', 'oracle+cx_oracle:')): # Oracle requires auto_setinputsizes=False (or at least a special # set of exclusions from it, which I don't know) if 'auto_setinputsizes' not in uri: diff --git a/pokedex/db/load.py b/pokedex/db/load.py index ee09721..01859a2 100644 --- a/pokedex/db/load.py +++ b/pokedex/db/load.py @@ -139,19 +139,16 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s table_names = _get_table_names(metadata, tables) table_objs = [metadata.tables[name] for name in table_names] - # Oracle fixery, load needs short names - # flag for oracle stuff - oranames = (session.connection().dialect.name == 'oracle') - if oranames: - # Shorten table names, Oracle limits table and column names to 30 chars - # Make a dictionary to match old<->new names - oradict = rewrite_long_table_names() - if recursive: table_objs.extend(find_dependent_tables(table_objs)) table_objs = sqlalchemy.sql.util.sort_tables(table_objs) + # Limit table names to 30 characters for Oracle + oracle = (session.connection().dialect.name == 'oracle') + if oracle: + rewrite_long_table_names() + # SQLite speed tweaks if not safe and session.connection().dialect.name == 'sqlite': session.connection().execute("PRAGMA synchronous=OFF") @@ -186,15 +183,17 @@ def load(session, tables=[], directory=None, drop_tables=False, verbose=False, s # Okay, run through the tables and actually load the data now for table_obj in table_objs: - table_name = table_obj.name + if oracle: + table_name = table_obj._original_name + else: + table_name = table_obj.name + insert_stmt = table_obj.insert() print_start(table_name) + try: csvpath = "%s/%s.csv" % (directory, table_name) - # In oracle mode, use the original names instead of current - if oranames: - csvpath = "%s/%s.csv" % (directory, oradict[table_name]) csvfile = open(csvpath, 'rb') except IOError: # File doesn't exist; don't load anything! @@ -380,22 +379,23 @@ def dump(session, tables=[], directory=None, verbose=False, langs=['en']): table_names = _get_table_names(metadata, tables) table_names.sort() - # Oracle fixery : read from short table names, dump long names - oranames = (session.connection().dialect.name == 'oracle') - if oranames: - # Make a dictionary to match old<->new names - oradict = rewrite_long_table_names() + # Oracle needs to dump from tables with shortened names to csvs with the + # usual names + oracle = (session.connection().dialect.name == 'oracle') + if oracle: + rewrite_long_table_names() for table_name in table_names: print_start(table_name) table = metadata.tables[table_name] - writer = csv.writer(open("%s/%s.csv" % (directory, table_name), 'wb'), - lineterminator='\n') - # In oracle mode, use the original names instead of current - if oranames: - writer = csv.writer(open("%s/%s.csv" % (directory, oradict[table_name]), 'wb'), - lineterminator='\n') + if oracle: + filename = '%s/%s.csv' % (directory, table._original_name) + else: + filename = '%s/%s.csv' % (directory, table_name) + + writer = csv.writer(open(filename, 'wb'), lineterminator='\n') + columns = [col.name for col in table.columns] # For name tables, dump rows for official languages, as well as diff --git a/pokedex/db/oracle.py b/pokedex/db/oracle.py index 81b2105..ad9fd11 100644 --- a/pokedex/db/oracle.py +++ b/pokedex/db/oracle.py @@ -8,34 +8,19 @@ def rewrite_long_table_names(): Returns a dictionary matching short names -> long names. """ - # Load table names from metadata - t_names = metadata.tables.keys() - - table_names = list(t_names) - table_objs = [metadata.tables[name] for name in table_names] - - # Prepare a dictionary to match old<->new names - dictionary = {} + # Load tables from metadata + table_objs = metadata.tables.values() # Shorten table names, Oracle limits table and column names to 30 chars for table in table_objs: - table._orginal_name = table.name[:] - dictionary[table.name]=table._orginal_name - if len(table._orginal_name) > 30: - for letter in ['a', 'e', 'i', 'o', 'u', 'y']: - table.name=table.name.replace(letter,'') - dictionary[table.name]=table._orginal_name - return dictionary + table._original_name = table.name + if len(table.name) > 30: + for letter in 'aeiouy': + table.name = table.name.replace(letter, '') -def restore_long_table_names(metadata,dictionary): - """Modifies the table names to restore the long-naming. - - `metadata` - The metadata to restore. - - `dictionary` - The dictionary matching short name -> long name. - """ +def restore_long_table_names(): + """Modifies the table names to restore the long-naming.""" for table in metadata.tables.values(): - table.name = dictionary[table.name] + table.name = table._original_name + del table._original_name