diff options
-rw-r--r-- | fbin-backup.py | 90 | ||||
-rw-r--r-- | fbin/file_storage/base.py | 8 | ||||
-rw-r--r-- | fbin/file_storage/filesystem.py | 24 | ||||
-rw-r--r-- | fbin/file_storage/s3.py | 18 |
4 files changed, 131 insertions, 9 deletions
diff --git a/fbin-backup.py b/fbin-backup.py new file mode 100644 index 0000000..5ef3a87 --- /dev/null +++ b/fbin-backup.py @@ -0,0 +1,90 @@ +import argparse +import importlib +import os + +from flask import Flask, Response + +from fbin.db import db, User, File + +parser = argparse.ArgumentParser() +parser.add_argument('-s', '--source-config-file', default='fbin/fbin.cfg') +parser.add_argument('-t', '--target-config-file', required=True) +parser.add_argument('--update-db', action='store_true', help='Update DB as well') +parser.add_argument('-n', '--dry-run', action='store_true', help='Do not update anything') +args = parser.parse_args() + +class DummyFile: + def __init__(self, stream, filename, content_length): + self.stream = stream + self.filename = filename + self.content_length = content_length + + def save(self, fp): + chunk = self.stream.read(10*1024) + while chunk: + fp.write(chunk) + chunk = self.stream.read(10*1024) + +class IterStream: + def __init__(self, it): + self._it = it + + def read(self, n): + try: + return next(self._it) + except StopIteration: + return None + +def main(): + from fbin.file_storage.s3 import Storage as S3Storage + from fbin.file_storage.filesystem import Storage as FSStorage + source_storage = importlib.import_module(source_app.config.get('STORAGE_MODULE', '.file_storage.filesystem'), package='fbin').Storage(source_app) + target_storage = importlib.import_module(target_app.config.get('STORAGE_MODULE', '.file_storage.filesystem'), package='fbin').Storage(target_app) + copy_list = [] + with source_app.app_context(): + db.init_app(source_app) + print('Finding existing files') + for f in db.session.query(File).all(): + if f.user: + db.session.refresh(f.user) + if source_storage.file_exists(f) and not target_storage.file_exists(f): + print('COPY: ', end='') + copy_list.append(f) + else: + print('SKIP: ', end='') + print(f.hash, f.filename) + if not copy_list: + print('No valid files found') + return + print('Copying {} files'.format(len(copy_list))) + for f in copy_list: + if args.dry_run: + print('Would copy', f.hash, f.filename) + continue + print('Copying', f.hash, f.filename) + with source_app.app_context(): + db.init_app(source_app) + with source_app.test_request_context(): + source = source_storage.get_file(f) + if isinstance(source, str) and os.path.exists(source): + source = open(source, 'rb') + elif isinstance(source, Response): + source = IterStream(source.get_app_iter({'REQUEST_METHOD': 'GET'})) + df = DummyFile(source, f.filename, f.size) + with target_app.app_context(): + db.init_app(target_app) + if args.update_db: + if db.session.query(File).filter(File.hash == f.hash).one(): + print(' Cannot copy this file; hash already exists in target DB') + else: + target_storage.store_file(df, f.hash, f.user, f.ip) + else: + target_storage.upload_file(df, f.hash, f.user) + +source_app = Flask('source') +target_app = Flask('target') +with source_app.app_context(): + source_app.config.from_pyfile(args.source_config_file) +with target_app.app_context(): + target_app.config.from_pyfile(args.target_config_file) +main() diff --git a/fbin/file_storage/base.py b/fbin/file_storage/base.py index abdf580..aa2c510 100644 --- a/fbin/file_storage/base.py +++ b/fbin/file_storage/base.py @@ -13,6 +13,10 @@ class BaseStorage: if size_limit is not None and file.size > size_limit: raise FileSizeError('The file size is too large (max {})'.format(File.pretty_size(size_limit))) + def upload_file(self, uploaded_file, file_hash, user): + '''Upload data from uploaded_file.''' + raise NotImplementedError() + def add_file(self, file_hash, filename, size, user=None, ip=None, verify=True): '''Adds the file to the database. @@ -29,6 +33,10 @@ class BaseStorage: '''Store uploaded_file.''' raise NotImplementedError() + def file_exists(self, f): + '''Return True if the specified file exists. ''' + raise NotImplementedError() + def get_file(self, f): '''Return a file object for the specified file. diff --git a/fbin/file_storage/filesystem.py b/fbin/file_storage/filesystem.py index 3a640bb..7951d88 100644 --- a/fbin/file_storage/filesystem.py +++ b/fbin/file_storage/filesystem.py @@ -10,7 +10,7 @@ class Storage(BaseStorage): os.makedirs(self.app.config['FILE_DIRECTORY'], exist_ok=True) os.makedirs(self.app.config['THUMB_DIRECTORY'], exist_ok=True) - def store_file(self, uploaded_file, file_hash, user, ip): + def upload_file(self, uploaded_file, file_hash, user): size = uploaded_file.content_length if hasattr(uploaded_file.stream, 'file'): temp = None @@ -20,17 +20,25 @@ class Storage(BaseStorage): uploaded_file.save(temp.file) temp_path = temp.name size = os.path.getsize(temp_path) + new_path = os.path.join(self.app.config['FILE_DIRECTORY'], file_hash + os.path.splitext(uploaded_file.filename)[1]) + os.rename(temp_path, new_path) + if self.app.config.get('DESTINATION_MODE'): + os.chmod(new_path, self.app.config.get('DESTINATION_MODE')) + return new_path, size + + def store_file(self, uploaded_file, file_hash, user, ip): + file_path, size = self.upload_file(uploaded_file, file_hash, user) try: - new_file = self.add_file(file_hash, uploaded_file.filename, size, user, ip) - if new_file: - os.rename(temp_path, new_file.get_path()) - if self.app.config.get('DESTINATION_MODE'): - os.chmod(new_file.get_path(), self.app.config.get('DESTINATION_MODE')) - return new_file + return self.add_file(file_hash, uploaded_file.filename, size, user, ip) except: - os.unlink(temp.name) + if os.path.exists(file_path): + os.unlink(file_path) raise + def file_exists(self, f): + path = f.get_path() + return os.path.exists(path) + def get_file(self, f): path = f.get_path() if not os.path.exists(path): diff --git a/fbin/file_storage/s3.py b/fbin/file_storage/s3.py index a11488f..e2dd1ea 100644 --- a/fbin/file_storage/s3.py +++ b/fbin/file_storage/s3.py @@ -21,7 +21,7 @@ class Storage(BaseStorage): key += '_thumb' return key - def store_file(self, uploaded_file, file_hash, user, ip): + def upload_file(self, uploaded_file, file_hash, user): bucket = self.client.Bucket(self.app.config['S3_BUCKET']) key = self._get_object_key(file_hash, user.id if user else 0) obj = bucket.upload_fileobj(Fileobj=uploaded_file.stream, Key=key) @@ -29,12 +29,28 @@ class Storage(BaseStorage): if not size: obj = self.client.ObjectSummary(self.app.config['S3_BUCKET'], key) size = obj.size + return size + + def store_file(self, uploaded_file, file_hash, user, ip): + size = self.upload_file(uploaded_file, file_hash, user) try: return self.add_file(file_hash, uploaded_file.filename, size, user, ip) except: obj.delete() raise + def file_exists(self, f): + key = self.get_object_key(f) + bucket = self.app.config['S3_BUCKET'] + obj = self.client.Object(bucket, key) + try: + meta = obj.load() + return True + except botocore.exceptions.ClientError as e: + if e.response['Error']['Code'] == '404': + return False + raise + def get_file(self, f, thumb=False): key = self.get_object_key(f, thumb=thumb) if thumb: |