1
0
Fork 0
mirror of https://github.com/veekun/pokedex.git synced 2024-08-20 18:16:34 +00:00

Fixed a slew of foriegn key import problems.

Curse's type_id was 0, which is bogus; this has been fixed by creating a
real ????? type.
Fourth-gen moves all had zero as a contest effect id, which was also
bogus.
Pokémon 494 and 495 were junk and have been scrapped entirely.
pokemon_form_groups's description column was too short.

pokedex's connect() now takes kwargs passed to sessionmaker().

A more major change: some tables, like pokemon, are self-referential and
contain rows that refer to rows later in the table (for example, Pikachu
evolves from Pichu, which has a higher id).  At the moment such a row is
loaded, the foreign key is thus bogus.  I solved this by turning on
autocommit and wrapping add() in a try block, then attempting to readd
every failed row again after the rest of the table is finished.  Slows
the import down a bit, but makes it work perfectly with foreign key
checks on.
This commit is contained in:
Eevee 2009-07-03 23:12:13 -04:00
parent 185264a288
commit 634ef3ed1e
9 changed files with 156 additions and 140 deletions

View file

@ -1,6 +1,7 @@
# encoding: utf8
import sys
from sqlalchemy.exc import IntegrityError
import sqlalchemy.types
from .db import connect, metadata, tables as tables_module
@ -25,7 +26,8 @@ def csvimport(engine_uri, directory='.'):
from sqlalchemy.orm.attributes import instrumentation_registry
session = connect(engine_uri)
# Use autocommit in case rows fail due to foreign key incest
session = connect(engine_uri, autocommit=True, autoflush=False)
metadata.create_all()
@ -59,6 +61,7 @@ def csvimport(engine_uri, directory='.'):
# Print the table name but leave the cursor in a fixed column
print table_name + '...', ' ' * (40 - len(table_name)),
sys.stdout.flush()
try:
csvfile = open("%s/%s.csv" % (directory, table_name), 'rb')
@ -70,6 +73,12 @@ def csvimport(engine_uri, directory='.'):
reader = csv.reader(csvfile, lineterminator='\n')
column_names = [unicode(column) for column in reader.next()]
# Self-referential tables may contain rows with foreign keys of
# other rows in the same table that do not yet exist. We'll keep
# a running list of these and try inserting them again after the
# rest are done
failed_rows = []
for csvs in reader:
row = table_class()
@ -91,11 +100,33 @@ def csvimport(engine_uri, directory='.'):
setattr(row, column_name, value)
session.add(row)
try:
session.add(row)
session.flush()
except IntegrityError as e:
failed_rows.append(row)
session.commit()
print 'loaded'
# Loop over the failed rows and keep trying to insert them. If a loop
# doesn't manage to insert any rows, bail.
do_another_loop = True
while failed_rows and do_another_loop:
do_another_loop = False
for i, row in enumerate(failed_rows):
try:
session.add(row)
session.flush()
# Success!
del failed_rows[i]
do_another_loop = True
except IntegrityError as e:
pass
if failed_rows:
print len(failed_rows), "rows failed"
else:
print 'loaded'
def csvexport(engine_uri, directory='.'):
import csv