Fixed a slew of foriegn key import problems. #29

Curse's type_id was 0, which is bogus; this has been fixed by creating a real ????? type. Fourth-gen moves all had zero as a contest effect id, which was also bogus. Pokémon 494 and 495 were junk and have been scrapped entirely. pokemon_form_groups's description column was too short. pokedex's connect() now takes kwargs passed to sessionmaker(). A more major change: some tables, like pokemon, are self-referential and contain rows that refer to rows later in the table (for example, Pikachu evolves from Pichu, which has a higher id). At the moment such a row is loaded, the foreign key is thus bogus. I solved this by turning on autocommit and wrapping add() in a try block, then attempting to readd every failed row again after the rest of the table is finished. Slows the import down a bit, but makes it work perfectly with foreign key checks on.
2024-08-20 18:16:34 +00:00 · 2009-07-03 23:12:13 -04:00 · 2009-07-03 23:12:13 -04:00 · 634ef3ed1e
commit 634ef3ed1e
parent 185264a288
9 changed files with 156 additions and 140 deletions
--- a/pokedex/init.py
+++ b/pokedex/init.py
@ -1,6 +1,7 @@
 # encoding: utf8
 import sys

+from sqlalchemy.exc import IntegrityError
 import sqlalchemy.types

 from .db import connect, metadata, tables as tables_module
@ -25,7 +26,8 @@ def csvimport(engine_uri, directory='.'):

    from sqlalchemy.orm.attributes import instrumentation_registry

-    session = connect(engine_uri)
+    # Use autocommit in case rows fail due to foreign key incest
+    session = connect(engine_uri, autocommit=True, autoflush=False)

    metadata.create_all()

@ -59,6 +61,7 @@ def csvimport(engine_uri, directory='.'):

        # Print the table name but leave the cursor in a fixed column
        print table_name + '...', ' ' * (40 - len(table_name)),
+        sys.stdout.flush()

        try:
            csvfile = open("%s/%s.csv" % (directory, table_name), 'rb')
@ -70,6 +73,12 @@ def csvimport(engine_uri, directory='.'):
        reader = csv.reader(csvfile, lineterminator='\n')
        column_names = [unicode(column) for column in reader.next()]

+        # Self-referential tables may contain rows with foreign keys of
+        # other rows in the same table that do not yet exist.  We'll keep
+        # a running list of these and try inserting them again after the
+        # rest are done
+        failed_rows = []
+
        for csvs in reader:
            row = table_class()

@ -91,11 +100,33 @@ def csvimport(engine_uri, directory='.'):

                setattr(row, column_name, value)

-            session.add(row)
+            try:
+                session.add(row)
+                session.flush()
+            except IntegrityError as e:
+                failed_rows.append(row)

-        session.commit()
-        print 'loaded'
+        # Loop over the failed rows and keep trying to insert them.  If a loop
+        # doesn't manage to insert any rows, bail.
+        do_another_loop = True
+        while failed_rows and do_another_loop:
+            do_another_loop = False

+            for i, row in enumerate(failed_rows):
+                try:
+                    session.add(row)
+                    session.flush()
+
+                    # Success!
+                    del failed_rows[i]
+                    do_another_loop = True
+                except IntegrityError as e:
+                    pass
+
+        if failed_rows:
+            print len(failed_rows), "rows failed"
+        else:
+            print 'loaded'

 def csvexport(engine_uri, directory='.'):
    import csv