summaryrefslogtreecommitdiff
path: root/fbin-backup.py
diff options
context:
space:
mode:
authorJon Bergli Heier <snakebite@jvnv.net>2020-10-28 20:14:05 +0100
committerJon Bergli Heier <snakebite@jvnv.net>2020-10-28 20:14:05 +0100
commitb1ed551c3125278d14a69750fea2bfe39cf68530 (patch)
treefe7a89bd5bea3d53b18009a55e29b9f4181ca600 /fbin-backup.py
parent0faa732c9a3e1ffced2b26bee682f513b0e5f0ae (diff)
Add fbin-backup.py
This is a backup script which will copy files to a target storage, and optionally add database entries as well. It works by specifying a source and a target config file. Both should have separate storage configured, and files will be copied from the source storage to the target storage. The list of files to copy is read from the source database, which means this must be accessible from where the script is run, not just the storage. If updating databse entries the target database must also be accessible. The target database is also check for any existing file hashes, and any files that would cause a collision will be skipped.
Diffstat (limited to 'fbin-backup.py')
-rw-r--r--fbin-backup.py90
1 files changed, 90 insertions, 0 deletions
diff --git a/fbin-backup.py b/fbin-backup.py
new file mode 100644
index 0000000..5ef3a87
--- /dev/null
+++ b/fbin-backup.py
@@ -0,0 +1,90 @@
+import argparse
+import importlib
+import os
+
+from flask import Flask, Response
+
+from fbin.db import db, User, File
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-s', '--source-config-file', default='fbin/fbin.cfg')
+parser.add_argument('-t', '--target-config-file', required=True)
+parser.add_argument('--update-db', action='store_true', help='Update DB as well')
+parser.add_argument('-n', '--dry-run', action='store_true', help='Do not update anything')
+args = parser.parse_args()
+
+class DummyFile:
+ def __init__(self, stream, filename, content_length):
+ self.stream = stream
+ self.filename = filename
+ self.content_length = content_length
+
+ def save(self, fp):
+ chunk = self.stream.read(10*1024)
+ while chunk:
+ fp.write(chunk)
+ chunk = self.stream.read(10*1024)
+
+class IterStream:
+ def __init__(self, it):
+ self._it = it
+
+ def read(self, n):
+ try:
+ return next(self._it)
+ except StopIteration:
+ return None
+
+def main():
+ from fbin.file_storage.s3 import Storage as S3Storage
+ from fbin.file_storage.filesystem import Storage as FSStorage
+ source_storage = importlib.import_module(source_app.config.get('STORAGE_MODULE', '.file_storage.filesystem'), package='fbin').Storage(source_app)
+ target_storage = importlib.import_module(target_app.config.get('STORAGE_MODULE', '.file_storage.filesystem'), package='fbin').Storage(target_app)
+ copy_list = []
+ with source_app.app_context():
+ db.init_app(source_app)
+ print('Finding existing files')
+ for f in db.session.query(File).all():
+ if f.user:
+ db.session.refresh(f.user)
+ if source_storage.file_exists(f) and not target_storage.file_exists(f):
+ print('COPY: ', end='')
+ copy_list.append(f)
+ else:
+ print('SKIP: ', end='')
+ print(f.hash, f.filename)
+ if not copy_list:
+ print('No valid files found')
+ return
+ print('Copying {} files'.format(len(copy_list)))
+ for f in copy_list:
+ if args.dry_run:
+ print('Would copy', f.hash, f.filename)
+ continue
+ print('Copying', f.hash, f.filename)
+ with source_app.app_context():
+ db.init_app(source_app)
+ with source_app.test_request_context():
+ source = source_storage.get_file(f)
+ if isinstance(source, str) and os.path.exists(source):
+ source = open(source, 'rb')
+ elif isinstance(source, Response):
+ source = IterStream(source.get_app_iter({'REQUEST_METHOD': 'GET'}))
+ df = DummyFile(source, f.filename, f.size)
+ with target_app.app_context():
+ db.init_app(target_app)
+ if args.update_db:
+ if db.session.query(File).filter(File.hash == f.hash).one():
+ print(' Cannot copy this file; hash already exists in target DB')
+ else:
+ target_storage.store_file(df, f.hash, f.user, f.ip)
+ else:
+ target_storage.upload_file(df, f.hash, f.user)
+
+source_app = Flask('source')
+target_app = Flask('target')
+with source_app.app_context():
+ source_app.config.from_pyfile(args.source_config_file)
+with target_app.app_context():
+ target_app.config.from_pyfile(args.target_config_file)
+main()