Store media usn per-file

Since anki.media.MediaManager does not store per-file usn (as it's
not really needed for anything in the client), this requires us to
drop it and implement a custom media manager.
This commit is contained in:
flan 2019-12-29 23:22:21 +01:00
parent ac1920d078
commit 10f47611bf
5 changed files with 122 additions and 64 deletions

View File

@ -1,6 +1,8 @@
import anki import anki
import anki.storage import anki.storage
import ankisyncd.media
import os, errno import os, errno
import logging import logging
@ -63,7 +65,13 @@ class CollectionWrapper:
return col return col
def _get_collection(self): def _get_collection(self):
return anki.storage.Collection(self.path) col = anki.storage.Collection(self.path)
# Ugly hack, replace default media manager with our custom one
col.media.close()
col.media = ankisyncd.media.ServerMediaManager(col)
return col
def open(self): def open(self):
"""Open the collection, or create it if it doesn't exist.""" """Open the collection, or create it if it doesn't exist."""

56
ankisyncd/media.py Normal file
View File

@ -0,0 +1,56 @@
# Based on anki.media.MediaManager, © Ankitects Pty Ltd and contributors
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
# Original source: https://raw.githubusercontent.com/dae/anki/62481ddc1aa78430cb8114cbf00a7739824318a8/anki/media.py
import re
import os
import os.path
import anki.db
class ServerMediaManager:
def __init__(self, col):
self._dir = re.sub(r"(?i)\.(anki2)$", ".media", col.path)
self.connect()
def connect(self):
path = self.dir() + ".server.db"
# TODO: migrate old db
create = not os.path.exists(path)
self.db = anki.db.DB(path)
if create:
self.db.executescript(
"""CREATE TABLE media (
fname TEXT NOT NULL PRIMARY KEY,
usn INT NOT NULL,
csum TEXT -- null if deleted
);
CREATE INDEX idx_media_usn ON media (usn);"""
)
def close(self):
self.db.close()
def dir(self):
return self._dir
def lastUsn(self):
return self.db.scalar("SELECT usn FROM media ORDER BY usn DESC LIMIT 1") or 0
def mediaCount(self):
return self.db.scalar("SELECT count() FROM media WHERE csum IS NOT NULL")
# used only in unit tests
def syncInfo(self, fname):
return self.db.first("SELECT csum, 0 FROM media WHERE fname=?", fname)
def syncDelete(self, fname):
fpath = os.path.join(self.dir(), fname)
if os.path.exists(fpath):
os.remove(fpath)
self.db.execute(
"UPDATE media SET csum = NULL, usn = ? WHERE fname = ?",
self.lastUsn() + 1,
fname,
)

View File

@ -175,7 +175,7 @@ class SyncMediaHandler:
operations = ['begin', 'mediaChanges', 'mediaSanity', 'uploadChanges', 'downloadFiles'] operations = ['begin', 'mediaChanges', 'mediaSanity', 'uploadChanges', 'downloadFiles']
def __init__(self, col): def __init__(self, col):
anki.sync.MediaSyncer.__init__(self, col) self.col = col
def begin(self, skey): def begin(self, skey):
return { return {
@ -196,11 +196,6 @@ class SyncMediaHandler:
self._check_zip_data(z) self._check_zip_data(z)
processed_count = self._adopt_media_changes_from_zip(z) processed_count = self._adopt_media_changes_from_zip(z)
# We increment our lastUsn once for each file we processed.
# (lastUsn - processed_count) must equal the client's lastUsn.
our_last_usn = self.col.media.lastUsn()
self.col.media.setLastUsn(our_last_usn + processed_count)
return { return {
'data': [processed_count, self.col.media.lastUsn()], 'data': [processed_count, self.col.media.lastUsn()],
'err': '', 'err': '',
@ -238,6 +233,8 @@ class SyncMediaHandler:
# Add media files that were added on the client. # Add media files that were added on the client.
media_to_add = [] media_to_add = []
usn = self.col.media.lastUsn()
oldUsn = usn
for i in zip_file.infolist(): for i in zip_file.infolist():
if i.filename == "_meta": # Ignore previously retrieved metadata. if i.filename == "_meta": # Ignore previously retrieved metadata.
continue continue
@ -250,9 +247,9 @@ class SyncMediaHandler:
# Save file to media directory. # Save file to media directory.
with open(file_path, 'wb') as f: with open(file_path, 'wb') as f:
f.write(file_data) f.write(file_data)
mtime = self.col.media._mtime(file_path)
media_to_add.append((filename, csum, mtime, 0)) usn += 1
media_to_add.append((filename, usn, csum))
# We count all files we are to remove, even if we don't have them in # We count all files we are to remove, even if we don't have them in
# our media directory and our db doesn't know about them. # our media directory and our db doesn't know about them.
@ -265,8 +262,10 @@ class SyncMediaHandler:
if media_to_add: if media_to_add:
self.col.media.db.executemany( self.col.media.db.executemany(
"INSERT OR REPLACE INTO media VALUES (?,?,?,?)", media_to_add) "INSERT OR REPLACE INTO media VALUES (?,?,?)", media_to_add)
self.col.media.db.commit()
assert self.col.media.lastUsn() == oldUsn + processed_count # TODO: move to some unit test
return processed_count return processed_count
@staticmethod @staticmethod
@ -289,18 +288,11 @@ class SyncMediaHandler:
Marks all files in list filenames as deleted and removes them from the Marks all files in list filenames as deleted and removes them from the
media directory. media directory.
""" """
# Mark the files as deleted in our db.
self.col.media.db.executemany("UPDATE media " +
"SET csum = NULL " +
" WHERE fname = ?",
[(f, ) for f in filenames])
# Remove the files from our media directory if it is present.
logger.debug('Removing %d files from media dir.' % len(filenames)) logger.debug('Removing %d files from media dir.' % len(filenames))
for filename in filenames: for filename in filenames:
try: try:
os.remove(os.path.join(self.col.media.dir(), filename)) self.col.media.syncDelete(filename)
self.col.media.db.commit()
except OSError as err: except OSError as err:
logger.error("Error when removing file '%s' from media dir: " logger.error("Error when removing file '%s' from media dir: "
"%s" % (filename, str(err))) "%s" % (filename, str(err)))
@ -330,7 +322,7 @@ class SyncMediaHandler:
fname = csum = None fname = csum = None
if lastUsn < usn or lastUsn == 0: if lastUsn < usn or lastUsn == 0:
for fname,mtime,csum, in self.col.media.db.execute("select fname,mtime,csum from media"): for fname,usn,csum, in self.col.media.db.execute("select fname,usn,csum from media"):
result.append([fname, usn, csum]) result.append([fname, usn, csum])
return {'data': result, 'err': ''} return {'data': result, 'err': ''}

View File

@ -5,6 +5,8 @@ import os
import shutil import shutil
import tempfile import tempfile
import anki.utils
from ankisyncd.sync_app import SyncApp, SyncCollectionHandler, SyncMediaHandler from ankisyncd.sync_app import SyncApp, SyncCollectionHandler, SyncMediaHandler
@ -65,21 +67,24 @@ def get_syncer_for_hkey(server, hkey, syncer_type='collection'):
return session.get_handler_for_operation(handler_method, col) return session.get_handler_for_operation(handler_method, col)
def add_files_to_mediasyncer(media_syncer, filepaths, def add_files_to_client_mediadb(media, filepaths, update_db=False):
update_db=False, bump_last_usn=False):
"""
If bumpLastUsn is True, the media syncer's lastUsn will be incremented
once for each added file. Use this when adding files to the server.
"""
for filepath in filepaths: for filepath in filepaths:
logging.debug("Adding file '{}' to mediaSyncer".format(filepath)) logging.debug("Adding file '{}' to client media DB".format(filepath))
# Import file into media dir. # Import file into media dir.
media_syncer.col.media.addFile(filepath) media.addFile(filepath)
if bump_last_usn:
# Need to bump lastUsn once for each file.
media_manager = media_syncer.col.media
media_manager.setLastUsn(media_syncer.col.media.lastUsn() + 1)
if update_db: if update_db:
media_syncer.col.media.findChanges() # Write changes to db. media.findChanges() # Write changes to db.
def add_files_to_server_mediadb(media, filepaths):
for filepath in filepaths:
logging.debug("Adding file '{}' to server media DB".format(filepath))
fname = os.path.basename(filepath)
with open(filepath, 'rb') as infile:
data = infile.read()
csum = anki.utils.checksum(data)
with open(os.path.join(media.dir(), fname), 'wb') as f:
f.write(data)
media.db.execute("INSERT INTO media VALUES (?, ?, ?)", fname, media.lastUsn() + 1, csum)
media.db.commit()

View File

@ -8,6 +8,7 @@ import shutil
import helpers.file_utils import helpers.file_utils
import helpers.server_utils import helpers.server_utils
import helpers.db_utils import helpers.db_utils
import anki.utils
from anki.sync import MediaSyncer from anki.sync import MediaSyncer
from helpers.mock_servers import MockRemoteMediaServer from helpers.mock_servers import MockRemoteMediaServer
from helpers.monkey_patches import monkeypatch_mediamanager, unpatch_mediamanager from helpers.monkey_patches import monkeypatch_mediamanager, unpatch_mediamanager
@ -41,6 +42,11 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase):
server=mock_remote_server) server=mock_remote_server)
return media_syncer return media_syncer
@staticmethod
def file_checksum(fname):
with open(fname, "rb") as f:
return anki.utils.checksum(f.read())
def media_dbs_differ(self, left_db_path, right_db_path, compare_timestamps=False): def media_dbs_differ(self, left_db_path, right_db_path, compare_timestamps=False):
""" """
Compares two media sqlite database files for equality. mtime and dirMod Compares two media sqlite database files for equality. mtime and dirMod
@ -103,10 +109,7 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase):
temp_file_path = helpers.file_utils.create_named_file("foo.jpg", "hello") temp_file_path = helpers.file_utils.create_named_file("foo.jpg", "hello")
# Add the test file to the server's collection. # Add the test file to the server's collection.
helpers.server_utils.add_files_to_mediasyncer(server, helpers.server_utils.add_files_to_server_mediadb(server.col.media, [temp_file_path])
[temp_file_path],
update_db=True,
bump_last_usn=True)
# Syncing should work. # Syncing should work.
self.assertEqual(client.sync(), 'OK') self.assertEqual(client.sync(), 'OK')
@ -135,10 +138,9 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase):
temp_file_path = helpers.file_utils.create_named_file("foo.jpg", "hello") temp_file_path = helpers.file_utils.create_named_file("foo.jpg", "hello")
# Add the test file to the client's media collection. # Add the test file to the client's media collection.
helpers.server_utils.add_files_to_mediasyncer(client, helpers.server_utils.add_files_to_client_mediadb(client.col.media,
[temp_file_path], [temp_file_path],
update_db=True, update_db=True)
bump_last_usn=False)
# Syncing should work. # Syncing should work.
self.assertEqual(client.sync(), 'OK') self.assertEqual(client.sync(), 'OK')
@ -151,11 +153,12 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase):
# Further syncing should do nothing. # Further syncing should do nothing.
self.assertEqual(client.sync(), 'noChanges') self.assertEqual(client.sync(), 'noChanges')
# Except for timestamps, the media databases of client and server # The media data of client and server should be identical.
# should be identical. self.assertEqual(
self.assertFalse( list(client.col.media.db.execute("SELECT fname, csum FROM media")),
self.media_dbs_differ(client.col.media.db._path, server.col.media.db._path) list(server.col.media.db.execute("SELECT fname, csum FROM media"))
) )
self.assertEqual(client.col.media.lastUsn(), server.col.media.lastUsn())
def test_sync_different_files(self): def test_sync_different_files(self):
""" """
@ -174,13 +177,10 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase):
file_for_client = helpers.file_utils.create_named_file("foo.jpg", "hello") file_for_client = helpers.file_utils.create_named_file("foo.jpg", "hello")
file_for_server = helpers.file_utils.create_named_file("bar.jpg", "goodbye") file_for_server = helpers.file_utils.create_named_file("bar.jpg", "goodbye")
helpers.server_utils.add_files_to_mediasyncer(client, helpers.server_utils.add_files_to_client_mediadb(client.col.media,
[file_for_client], [file_for_client],
update_db=True) update_db=True)
helpers.server_utils.add_files_to_mediasyncer(server, helpers.server_utils.add_files_to_server_mediadb(server.col.media, [file_for_server])
[file_for_server],
update_db=True,
bump_last_usn=True)
# Syncing should work. # Syncing should work.
self.assertEqual(client.sync(), 'OK') self.assertEqual(client.sync(), 'OK')
@ -222,13 +222,10 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase):
file_for_client = helpers.file_utils.create_named_file("foo.jpg", "hello") file_for_client = helpers.file_utils.create_named_file("foo.jpg", "hello")
file_for_server = helpers.file_utils.create_named_file("foo.jpg", "goodbye") file_for_server = helpers.file_utils.create_named_file("foo.jpg", "goodbye")
helpers.server_utils.add_files_to_mediasyncer(client, helpers.server_utils.add_files_to_client_mediadb(client.col.media,
[file_for_client], [file_for_client],
update_db=True) update_db=True)
helpers.server_utils.add_files_to_mediasyncer(server, helpers.server_utils.add_files_to_server_mediadb(server.col.media, [file_for_server])
[file_for_server],
update_db=True,
bump_last_usn=True)
# Syncing should work. # Syncing should work.
self.assertEqual(client.sync(), 'OK') self.assertEqual(client.sync(), 'OK')
@ -267,10 +264,9 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase):
temp_file_path = helpers.file_utils.create_named_file("foo.jpg", "hello") temp_file_path = helpers.file_utils.create_named_file("foo.jpg", "hello")
# Add the test file to client's media collection. # Add the test file to client's media collection.
helpers.server_utils.add_files_to_mediasyncer(client, helpers.server_utils.add_files_to_client_mediadb(client.col.media,
[temp_file_path], [temp_file_path],
update_db=True, update_db=True)
bump_last_usn=False)
# Syncing client should work. # Syncing client should work.
self.assertEqual(client.sync(), 'OK') self.assertEqual(client.sync(), 'OK')
@ -314,7 +310,7 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase):
# findChanges(), only during syncs. # findChanges(), only during syncs.
support_file = helpers.file_utils.get_asset_path('blue.jpg') support_file = helpers.file_utils.get_asset_path('blue.jpg')
self.assertTrue(os.path.isfile(support_file)) self.assertTrue(os.path.isfile(support_file))
helpers.server_utils.add_files_to_mediasyncer(client, helpers.server_utils.add_files_to_client_mediadb(client.col.media,
[support_file], [support_file],
update_db=False) update_db=False)
@ -362,7 +358,7 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase):
server = helpers.server_utils.get_syncer_for_hkey(self.server_app, self.hkey, 'media') server = helpers.server_utils.get_syncer_for_hkey(self.server_app, self.hkey, 'media')
self.assertEqual(server.mediaChanges(lastUsn=client.col.media.lastUsn())['data'], []) self.assertEqual(server.mediaChanges(lastUsn=client.col.media.lastUsn())['data'], [])
helpers.server_utils.add_files_to_mediasyncer(client, [ helpers.server_utils.add_files_to_client_mediadb(client.col.media, [
helpers.file_utils.create_named_file("a", "lastUsn a"), helpers.file_utils.create_named_file("a", "lastUsn a"),
helpers.file_utils.create_named_file("b", "lastUsn b"), helpers.file_utils.create_named_file("b", "lastUsn b"),
helpers.file_utils.create_named_file("c", "lastUsn c"), helpers.file_utils.create_named_file("c", "lastUsn c"),
@ -374,17 +370,18 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase):
os.remove(os.path.join(client2.col.media.dir(), "c")) os.remove(os.path.join(client2.col.media.dir(), "c"))
client2.col.media._logChanges() client2.col.media._logChanges()
self.assertEqual(client2.sync(), "OK") self.assertEqual(client2.sync(), "OK")
server.col.media._logChanges()
self.assertEqual(server.mediaChanges(lastUsn=client.col.media.lastUsn())['data'], [['c', 4, None]]) self.assertEqual(server.mediaChanges(lastUsn=client.col.media.lastUsn())['data'], [['c', 4, None]])
self.assertEqual(client.sync(), "OK") self.assertEqual(client.sync(), "OK")
self.assertEqual(server.mediaChanges(lastUsn=client.col.media.lastUsn())['data'], []) self.assertEqual(server.mediaChanges(lastUsn=client.col.media.lastUsn())['data'], [])
helpers.server_utils.add_files_to_mediasyncer(client, [ helpers.server_utils.add_files_to_client_mediadb(client.col.media, [
helpers.file_utils.create_named_file("d", "lastUsn d"), helpers.file_utils.create_named_file("d", "lastUsn d"),
], update_db=True) ], update_db=True)
client.col.media._logChanges() client.col.media._logChanges()
self.assertEqual(client.sync(), "OK") self.assertEqual(client.sync(), "OK")
self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], [['d', 5, server.col.media._checksum(os.path.join(server.col.media.dir(), "d"))]])
self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], [['d', 5, self.file_checksum(os.path.join(server.col.media.dir(), "d"))]])
self.assertEqual(client2.sync(), "OK") self.assertEqual(client2.sync(), "OK")
self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], []) self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], [])
@ -395,6 +392,6 @@ class SyncAppFunctionalMediaTest(SyncAppFunctionalTestBase):
os.utime(dpath, (315529200, 315529200)) os.utime(dpath, (315529200, 315529200))
client.col.media._logChanges() client.col.media._logChanges()
self.assertEqual(client.sync(), "OK") self.assertEqual(client.sync(), "OK")
self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], [['d', 6, server.col.media._checksum(os.path.join(server.col.media.dir(), "d"))]]) self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], [['d', 6, self.file_checksum(os.path.join(server.col.media.dir(), "d"))]])
self.assertEqual(client2.sync(), "OK") self.assertEqual(client2.sync(), "OK")
self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], []) self.assertEqual(server.mediaChanges(lastUsn=client2.col.media.lastUsn())['data'], [])