From 10f47611bf829d9e081a1213f89bbf4b25b8d59a Mon Sep 17 00:00:00 2001 From: flan Date: Sun, 29 Dec 2019 23:22:21 +0100 Subject: [PATCH] Store media usn per-file Since anki.media.MediaManager does not store per-file usn (as it's not really needed for anything in the client), this requires us to drop it and implement a custom media manager. --- ankisyncd/collection.py | 10 +++++- ankisyncd/media.py | 56 ++++++++++++++++++++++++++++++++++ ankisyncd/sync_app.py | 30 +++++++----------- tests/helpers/server_utils.py | 33 +++++++++++--------- tests/test_web_media.py | 57 +++++++++++++++++------------------ 5 files changed, 122 insertions(+), 64 deletions(-) create mode 100644 ankisyncd/media.py diff --git a/ankisyncd/collection.py b/ankisyncd/collection.py index 4603f4b..e32dbfe 100644 --- a/ankisyncd/collection.py +++ b/ankisyncd/collection.py @@ -1,6 +1,8 @@ import anki import anki.storage +import ankisyncd.media + import os, errno import logging @@ -63,7 +65,13 @@ class CollectionWrapper: return col def _get_collection(self): - return anki.storage.Collection(self.path) + col = anki.storage.Collection(self.path) + + # Ugly hack, replace default media manager with our custom one + col.media.close() + col.media = ankisyncd.media.ServerMediaManager(col) + + return col def open(self): """Open the collection, or create it if it doesn't exist.""" diff --git a/ankisyncd/media.py b/ankisyncd/media.py new file mode 100644 index 0000000..1d3356b --- /dev/null +++ b/ankisyncd/media.py @@ -0,0 +1,56 @@ +# Based on anki.media.MediaManager, © Ankitects Pty Ltd and contributors +# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html +# Original source: https://raw.githubusercontent.com/dae/anki/62481ddc1aa78430cb8114cbf00a7739824318a8/anki/media.py + +import re +import os +import os.path + +import anki.db + + +class ServerMediaManager: + def __init__(self, col): + self._dir = re.sub(r"(?i)\.(anki2)$", ".media", col.path) + self.connect() + + def connect(self): + path = self.dir() + ".server.db" + # TODO: migrate old db + create = not os.path.exists(path) + self.db = anki.db.DB(path) + if create: + self.db.executescript( + """CREATE TABLE media ( + fname TEXT NOT NULL PRIMARY KEY, + usn INT NOT NULL, + csum TEXT -- null if deleted + ); + CREATE INDEX idx_media_usn ON media (usn);""" + ) + + def close(self): + self.db.close() + + def dir(self): + return self._dir + + def lastUsn(self): + return self.db.scalar("SELECT usn FROM media ORDER BY usn DESC LIMIT 1") or 0 + + def mediaCount(self): + return self.db.scalar("SELECT count() FROM media WHERE csum IS NOT NULL") + + # used only in unit tests + def syncInfo(self, fname): + return self.db.first("SELECT csum, 0 FROM media WHERE fname=?", fname) + + def syncDelete(self, fname): + fpath = os.path.join(self.dir(), fname) + if os.path.exists(fpath): + os.remove(fpath) + self.db.execute( + "UPDATE media SET csum = NULL, usn = ? WHERE fname = ?", + self.lastUsn() + 1, + fname, + ) diff --git a/ankisyncd/sync_app.py b/ankisyncd/sync_app.py index e2666e1..9553c24 100644 --- a/ankisyncd/sync_app.py +++ b/ankisyncd/sync_app.py @@ -175,7 +175,7 @@ class SyncMediaHandler: operations = ['begin', 'mediaChanges', 'mediaSanity', 'uploadChanges', 'downloadFiles'] def __init__(self, col): - anki.sync.MediaSyncer.__init__(self, col) + self.col = col def begin(self, skey): return { @@ -196,11 +196,6 @@ class SyncMediaHandler: self._check_zip_data(z) processed_count = self._adopt_media_changes_from_zip(z) - # We increment our lastUsn once for each file we processed. - # (lastUsn - processed_count) must equal the client's lastUsn. - our_last_usn = self.col.media.lastUsn() - self.col.media.setLastUsn(our_last_usn + processed_count) - return { 'data': [processed_count, self.col.media.lastUsn()], 'err': '', @@ -238,6 +233,8 @@ class SyncMediaHandler: # Add media files that were added on the client. media_to_add = [] + usn = self.col.media.lastUsn() + oldUsn = usn for i in zip_file.infolist(): if i.filename == "_meta": # Ignore previously retrieved metadata. continue @@ -250,9 +247,9 @@ class SyncMediaHandler: # Save file to media directory. with open(file_path, 'wb') as f: f.write(file_data) - mtime = self.col.media._mtime(file_path) - media_to_add.append((filename, csum, mtime, 0)) + usn += 1 + media_to_add.append((filename, usn, csum)) # We count all files we are to remove, even if we don't have them in # our media directory and our db doesn't know about them. @@ -265,8 +262,10 @@ class SyncMediaHandler: if media_to_add: self.col.media.db.executemany( - "INSERT OR REPLACE INTO media VALUES (?,?,?,?)", media_to_add) + "INSERT OR REPLACE INTO media VALUES (?,?,?)", media_to_add) + self.col.media.db.commit() + assert self.col.media.lastUsn() == oldUsn + processed_count # TODO: move to some unit test return processed_count @staticmethod @@ -289,18 +288,11 @@ class SyncMediaHandler: Marks all files in list filenames as deleted and removes them from the media directory. """ - - # Mark the files as deleted in our db. - self.col.media.db.executemany("UPDATE media " + - "SET csum = NULL " + - " WHERE fname = ?", - [(f, ) for f in filenames]) - - # Remove the files from our media directory if it is present. logger.debug('Removing %d files from media dir.' % len(filenames)) for filename in filenames: try: - os.remove(os.path.join(self.col.media.dir(), filename)) + self.col.media.syncDelete(filename) + self.col.media.db.commit() except OSError as err: logger.error("Error when removing file '%s' from media dir: " "%s" % (filename, str(err))) @@ -330,7 +322,7 @@ class SyncMediaHandler: fname = csum = None if lastUsn < usn or lastUsn == 0: - for fname,mtime,csum, in self.col.media.db.execute("select fname,mtime,csum from media"): + for fname,usn,csum, in self.col.media.db.execute("select fname,usn,csum from media"): result.append([fname, usn, csum]) return {'data': result, 'err': ''} diff --git a/tests/helpers/server_utils.py b/tests/helpers/server_utils.py index 33c1772..fed41ac 100644 --- a/tests/helpers/server_utils.py +++ b/tests/helpers/server_utils.py @@ -5,6 +5,8 @@ import os import shutil import tempfile +import anki.utils + from ankisyncd.sync_app import SyncApp, SyncCollectionHandler, SyncMediaHandler @@ -65,21 +67,24 @@ def get_syncer_for_hkey(server, hkey, syncer_type='collection'): return session.get_handler_for_operation(handler_method, col) -def add_files_to_mediasyncer(media_syncer, filepaths, - update_db=False, bump_last_usn=False): - """ - If bumpLastUsn is True, the media syncer's lastUsn will be incremented - once for each added file. Use this when adding files to the server. - """ - +def add_files_to_client_mediadb(media, filepaths, update_db=False): for filepath in filepaths: - logging.debug("Adding file '{}' to mediaSyncer".format(filepath)) + logging.debug("Adding file '{}' to client media DB".format(filepath)) # Import file into media dir. - media_syncer.col.media.addFile(filepath) - if bump_last_usn: - # Need to bump lastUsn once for each file. - media_manager = media_syncer.col.media - media_manager.setLastUsn(media_syncer.col.media.lastUsn() + 1) + media.addFile(filepath) if update_db: - media_syncer.col.media.findChanges() # Write changes to db. + media.findChanges() # Write changes to db. + +def add_files_to_server_mediadb(media, filepaths): + for filepath in filepaths: + logging.debug("Adding file '{}' to server media DB".format(filepath)) + fname = os.path.basename(filepath) + with open(filepath, 'rb') as infile: + data = infile.read() + csum = anki.utils.checksum(data) + + with open(os.path.join(media.dir(), fname), 'wb') as f: + f.write(data) + media.db.execute("INSERT INTO media VALUES (?, ?, ?)", fname, media.lastUsn() + 1, csum) + media.db.commit() diff --git a/tests/test_web_media.py b/tests/test_web_media.py index ca59d68..be7c746 100644 --- a/tests/test_web_media.py +++ b/tests/test_web_media.py @@ -8,6 +8,7 @@ import shutil import helpers.file_utils import helpers.server_utils import helpers.db_utils +import anki.utils from anki.sync import MediaSyncer from helpers.mock_servers import MockRemoteMediaServer from helpers.monkey_patches import monkeypatch_mediamanager, unpatch_mediamanager @@ -41,6 +42,11 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase): server=mock_remote_server) return media_syncer + @staticmethod + def file_checksum(fname): + with open(fname, "rb") as f: + return anki.utils.checksum(f.read()) + def media_dbs_differ(self, left_db_path, right_db_path, compare_timestamps=False): """ Compares two media sqlite database files for equality. mtime and dirMod @@ -103,10 +109,7 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase): temp_file_path = helpers.file_utils.create_named_file("foo.jpg", "hello") # Add the test file to the server's collection. - helpers.server_utils.add_files_to_mediasyncer(server, - [temp_file_path], - update_db=True, - bump_last_usn=True) + helpers.server_utils.add_files_to_server_mediadb(server.col.media, [temp_file_path]) # Syncing should work. self.assertEqual(client.sync(), 'OK') @@ -135,10 +138,9 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase): temp_file_path = helpers.file_utils.create_named_file("foo.jpg", "hello") # Add the test file to the client's media collection. - helpers.server_utils.add_files_to_mediasyncer(client, + helpers.server_utils.add_files_to_client_mediadb(client.col.media, [temp_file_path], - update_db=True, - bump_last_usn=False) + update_db=True) # Syncing should work. self.assertEqual(client.sync(), 'OK') @@ -151,11 +153,12 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase): # Further syncing should do nothing. self.assertEqual(client.sync(), 'noChanges') - # Except for timestamps, the media databases of client and server - # should be identical. - self.assertFalse( - self.media_dbs_differ(client.col.media.db._path, server.col.media.db._path) + # The media data of client and server should be identical. + self.assertEqual( + list(client.col.media.db.execute("SELECT fname, csum FROM media")), + list(server.col.media.db.execute("SELECT fname, csum FROM media")) ) + self.assertEqual(client.col.media.lastUsn(), server.col.media.lastUsn()) def test_sync_different_files(self): """ @@ -174,13 +177,10 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase): file_for_client = helpers.file_utils.create_named_file("foo.jpg", "hello") file_for_server = helpers.file_utils.create_named_file("bar.jpg", "goodbye") - helpers.server_utils.add_files_to_mediasyncer(client, + helpers.server_utils.add_files_to_client_mediadb(client.col.media, [file_for_client], update_db=True) - helpers.server_utils.add_files_to_mediasyncer(server, - [file_for_server], - update_db=True, - bump_last_usn=True) + helpers.server_utils.add_files_to_server_mediadb(server.col.media, [file_for_server]) # Syncing should work. self.assertEqual(client.sync(), 'OK') @@ -222,13 +222,10 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase): file_for_client = helpers.file_utils.create_named_file("foo.jpg", "hello") file_for_server = helpers.file_utils.create_named_file("foo.jpg", "goodbye") - helpers.server_utils.add_files_to_mediasyncer(client, + helpers.server_utils.add_files_to_client_mediadb(client.col.media, [file_for_client], update_db=True) - helpers.server_utils.add_files_to_mediasyncer(server, - [file_for_server], - update_db=True, - bump_last_usn=True) + helpers.server_utils.add_files_to_server_mediadb(server.col.media, [file_for_server]) # Syncing should work. self.assertEqual(client.sync(), 'OK') @@ -267,10 +264,9 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase): temp_file_path = helpers.file_utils.create_named_file("foo.jpg", "hello") # Add the test file to client's media collection. - helpers.server_utils.add_files_to_mediasyncer(client, + helpers.server_utils.add_files_to_client_mediadb(client.col.media, [temp_file_path], - update_db=True, - bump_last_usn=False) + update_db=True) # Syncing client should work. self.assertEqual(client.sync(), 'OK') @@ -314,7 +310,7 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase): # findChanges(), only during syncs. support_file = helpers.file_utils.get_asset_path('blue.jpg') self.assertTrue(os.path.isfile(support_file)) - helpers.server_utils.add_files_to_mediasyncer(client, + helpers.server_utils.add_files_to_client_mediadb(client.col.media, [support_file], update_db=False) @@ -362,7 +358,7 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase): server = helpers.server_utils.get_syncer_for_hkey(self.server_app, self.hkey, 'media') self.assertEqual(server.mediaChanges(lastUsn=client.col.media.lastUsn())['data'], []) - helpers.server_utils.add_files_to_mediasyncer(client, [ + helpers.server_utils.add_files_to_client_mediadb(client.col.media, [ helpers.file_utils.create_named_file("a", "lastUsn a"), helpers.file_utils.create_named_file("b", "lastUsn b"), helpers.file_utils.create_named_file("c", "lastUsn c"), @@ -374,17 +370,18 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase): os.remove(os.path.join(client2.col.media.dir(), "c")) client2.col.media._logChanges() self.assertEqual(client2.sync(), "OK") - server.col.media._logChanges() self.assertEqual(server.mediaChanges(lastUsn=client.col.media.lastUsn())['data'], [['c', 4, None]]) self.assertEqual(client.sync(), "OK") self.assertEqual(server.mediaChanges(lastUsn=client.col.media.lastUsn())['data'], []) - helpers.server_utils.add_files_to_mediasyncer(client, [ + helpers.server_utils.add_files_to_client_mediadb(client.col.media, [ helpers.file_utils.create_named_file("d", "lastUsn d"), ], update_db=True) client.col.media._logChanges() self.assertEqual(client.sync(), "OK") - self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], [['d', 5, server.col.media._checksum(os.path.join(server.col.media.dir(), "d"))]]) + + self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], [['d', 5, self.file_checksum(os.path.join(server.col.media.dir(), "d"))]]) + self.assertEqual(client2.sync(), "OK") self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], []) @@ -395,6 +392,6 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase): os.utime(dpath, (315529200, 315529200)) client.col.media._logChanges() self.assertEqual(client.sync(), "OK") - self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], [['d', 6, server.col.media._checksum(os.path.join(server.col.media.dir(), "d"))]]) + self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], [['d', 6, self.file_checksum(os.path.join(server.col.media.dir(), "d"))]]) self.assertEqual(client2.sync(), "OK") self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], [])