From b72ecc321c315bafe40cc7406e87e088564ab8a9 Mon Sep 17 00:00:00 2001 From: Jon Bergli Heier Date: Wed, 24 Jul 2019 09:02:43 +0200 Subject: Add file storage modules Allows for storing files other places than the local file system. Currently the local filesystem and S3 are supported. --- fbin/db.py | 8 +++-- fbin/fbin.py | 69 ++++++++++++++++++----------------------- fbin/file_storage/base.py | 39 +++++++++++++++++++++++ fbin/file_storage/filesystem.py | 44 ++++++++++++++++++++++++++ fbin/file_storage/s3.py | 55 ++++++++++++++++++++++++++++++++ 5 files changed, 174 insertions(+), 41 deletions(-) create mode 100644 fbin/file_storage/base.py create mode 100644 fbin/file_storage/filesystem.py create mode 100644 fbin/file_storage/s3.py diff --git a/fbin/db.py b/fbin/db.py index 2bf153b..be79c76 100644 --- a/fbin/db.py +++ b/fbin/db.py @@ -4,7 +4,7 @@ import mimetypes import os from flask import current_app -from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text, Index, ForeignKey, Boolean, JSON +from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text, Index, ForeignKey, Boolean, JSON, BigInteger from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker, relation, backref from sqlalchemy.orm.exc import NoResultFound @@ -48,6 +48,7 @@ class File(Base): id = Column(Integer, primary_key = True) hash = Column(String, unique = True, index = True) filename = Column(String) + size = Column(BigInteger) date = Column(DateTime) user_id = Column(Integer, ForeignKey('users.id'), nullable = True) ip = Column(String) @@ -55,9 +56,10 @@ class File(Base): scanned = Column(Boolean, nullable=False, default=False) blocked_reason = Column(JSON) - def __init__(self, hash, filename, date, user_id = None, ip = None): + def __init__(self, hash, filename, size, date, user_id = None, ip = None): self.hash = hash self.filename = filename + self.size = size self.date = date self.user_id = user_id self.ip = ip @@ -82,6 +84,8 @@ class File(Base): def get_size(self): try: + if self.size: + return self.size return os.path.getsize(self.get_path()) except OSError: return None diff --git a/fbin/fbin.py b/fbin/fbin.py index 19f82ed..d3065e2 100755 --- a/fbin/fbin.py +++ b/fbin/fbin.py @@ -4,6 +4,7 @@ import base64 import cgi import datetime import hashlib +import importlib import io import json import mimetypes @@ -14,7 +15,7 @@ import tempfile import urllib from urllib.parse import urlencode, urljoin -from flask import Blueprint, redirect, current_app, url_for, request, render_template, session, flash, send_file, abort, jsonify, Markup +from flask import Blueprint, redirect, current_app, url_for, request, render_template, session, flash, send_file, abort, jsonify, Markup, Response from flask_login import login_user, logout_user, current_user, login_required import jwt from PIL import Image @@ -25,13 +26,12 @@ from . import db from .monkey import patch as monkey_patch from .login import login_manager, load_user +storage = importlib.import_module(current_app.config.get('STORAGE_MODULE', '.file_storage.filesystem'), package='fbin').Storage(current_app) + monkey_patch() base62_alphabet = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' -if not os.path.isdir(current_app.config['FILE_DIRECTORY']): - os.mkdir(current_app.config['FILE_DIRECTORY']) - if not os.path.isdir(current_app.config['THUMB_DIRECTORY']): os.mkdir(current_app.config['THUMB_DIRECTORY']) @@ -57,19 +57,6 @@ def get_or_create_user(username, jab_id): except db.IntegrityError: return None -def add_file(path, filename, user = None, ip = None): - file_hash = ''.join(random.choice(base62_alphabet) for x in range(5)) - new_path = os.path.join(current_app.config['FILE_DIRECTORY'], file_hash + os.path.splitext(filename)[1]) - os.rename(path, new_path) - if current_app.config.get('DESTINATION_MODE'): - os.chmod(new_path, current_app.config.get('DESTINATION_MODE')) - with db.session_scope() as sess: - f = db.File(file_hash, filename, datetime.datetime.utcnow(), user.id if user else None, ip) - sess.add(f) - sess.commit() - sess.refresh(f) - return f - def get_file(file_hash, user_id=None, update_accessed=False): with db.session_scope() as sess: try: @@ -101,8 +88,7 @@ def delete_file(file): sess.delete(file) sess.commit() filename = file.get_path() - if os.path.exists(filename): - os.unlink(filename) + storage.delete_file(file) thumbfile = file.get_thumb_path() if os.path.exists(thumbfile): os.unlink(thumbfile) @@ -149,14 +135,8 @@ def upload(api=False, user=None): uploaded_file = request.files.get('file') if not uploaded_file or not uploaded_file.filename: return error('No valid file or filename was provided.') - if hasattr(uploaded_file.stream, 'file'): - temp = None - temp_path = uploaded_file.stream.name - else: - temp = tempfile.NamedTemporaryFile(prefix = 'upload_', dir = current_app.config['FILE_DIRECTORY'], delete = False) - uploaded_file.save(temp.file) - temp_path = temp.name - new_file = add_file(temp_path, uploaded_file.filename, user, request.remote_addr) + file_hash = ''.join(random.choice(base62_alphabet) for x in range(5)) + new_file = storage.store_file(uploaded_file, file_hash, user, request.remote_addr) mime = new_file.get_mime_type() # TODO: Apparently TIFF also supports EXIF, test this. @@ -207,9 +187,14 @@ def uploaded(hash): @app.route('/file//', endpoint = 'file') def _file(hash, ext=None, filename=None): f = get_file(hash) - if not f or not f.exists or f.blocked_reason: + if not f or f.blocked_reason: + abort(404) + path = storage.get_file(f) + if isinstance(path, Response): + return path + if not path or not os.path.exists(path): abort(404) - return send_file(f.get_path()) + return send_file(path) @app.route('/l') @app.route('/login') @@ -302,7 +287,7 @@ def files(): context = { 'title': 'Files', 'files': files, - 'total_size': db.File.pretty_size(sum(f.get_size() for f in files if f.exists)), + 'total_size': db.File.pretty_size(sum(f.size for f in files if f.size)), } return render_template('files.html', **context) @@ -354,7 +339,7 @@ def images(): 'title': 'Images', 'fullwidth': True, 'files': files, - 'total_size': db.File.pretty_size(sum(f.get_size() for f in files if f.exists)), + 'total_size': db.File.pretty_size(sum(f.size for f in files if f.size)), } return render_template('images.html', **context) @@ -367,7 +352,7 @@ def videos(): 'title': 'Videos', 'fullwidth': True, 'files': files, - 'total_size': db.File.pretty_size(sum(f.get_size() for f in files if f.exists)), + 'total_size': db.File.pretty_size(sum(f.size for f in files if f.size)), } return render_template('images.html', **context) @@ -379,18 +364,24 @@ def thumb(hash): f = get_file(hash, update_accessed = False) if f.is_image(): try: - im = Image.open(f.get_path()) + #im = Image.open(f.get_path()) + with storage.temp_file(f) as tf: + im = Image.open(tf) + # Check for valid JPEG modes. + if im.mode not in ('1', 'L', 'RGB', 'RGBX', 'CMYK', 'YCbCr'): + im = im.convert('RGB') + im.thumbnail(current_app.config.get('THUMB_SIZE', (128, 128)), Image.ANTIALIAS) + im.save(thumbfile) except IOError: # We can't generate a thumbnail for this file, just say it doesn't exist. abort(404) - # Check for valid JPEG modes. - if im.mode not in ('1', 'L', 'RGB', 'RGBX', 'CMYK', 'YCbCr'): - im = im.convert('RGB') - im.thumbnail(current_app.config.get('THUMB_SIZE', (128, 128)), Image.ANTIALIAS) - im.save(thumbfile) elif f.is_video(): - p = subprocess.run(['ffmpegthumbnailer', '-i', f.get_path(), '-o', thumbfile]) + #p = subprocess.run(['ffmpegthumbnailer', '-i', f.get_path(), '-o', thumbfile]) + with storage.temp_file(f) as tf: + p = subprocess.run(['ffmpegthumbnailer', '-i', '-', '-o', thumbfile], stdin=tf) if p.returncode != 0: + if os.path.exists(thumbfile): + os.unlink(thumbfile) abort(404) else: abort(404) diff --git a/fbin/file_storage/base.py b/fbin/file_storage/base.py new file mode 100644 index 0000000..6f39665 --- /dev/null +++ b/fbin/file_storage/base.py @@ -0,0 +1,39 @@ +import datetime + +from .. import db + +class BaseStorage: + def __init__(self, app): + self.app = app + + def add_file(self, file_hash, filename, size, user=None, ip=None): + '''Adds the file to the database. + + Call from store_file after the file is successfully stored.''' + with db.session_scope() as sess: + f = db.File(file_hash, filename, size, datetime.datetime.utcnow(), user.id if user else None, ip) + sess.add(f) + sess.commit() + sess.refresh(f) + return f + + def store_file(self, uploaded_file, file_hash, filename, user, ip): + '''Store uploaded_file.''' + raise NotImplementedError() + + def get_file(self, f): + '''Return a file object for the specified file. + + Subclasses can also return a flask.Response instance if required.''' + raise NotImplementedError() + + def delete_file(self, f): + '''Delete the specified file.''' + raise NotImplementedError() + + def temp_file(self, f): + '''Context manager which returns a temporary file for reading. + + This is used internally for eg. thumbnails.''' + raise NotImplementedError() + diff --git a/fbin/file_storage/filesystem.py b/fbin/file_storage/filesystem.py new file mode 100644 index 0000000..3433baf --- /dev/null +++ b/fbin/file_storage/filesystem.py @@ -0,0 +1,44 @@ +import contextlib +import os +import tempfile + +from .base import BaseStorage + +class Storage(BaseStorage): + def __init__(self, app): + super().__init__(app) + os.makedirs(self.app.config['FILE_DIRECTORY'], exist_ok=True) + + def store_file(self, uploaded_file, file_hash, user, ip): + size = uploaded_file.content_length + if hasattr(uploaded_file.stream, 'file'): + temp = None + temp_path = uploaded_file.stream.name + else: + temp = tempfile.NamedTemporaryFile(prefix='upload_', dir=self.app.config['FILE_DIRECTORY'], delete=False) + uploaded_file.save(temp.file) + temp_path = temp.name + size = os.path.getsize(temp_path) + try: + new_file = self.add_file(file_hash, uploaded_file.filename, size, user, ip) + os.rename(temp_path, new_file.get_path()) + return new_file + except: + os.unlink(temp.name) + raise + + def get_file(self, f): + path = f.get_path() + if not os.path.exists(path): + return + return path + + def delete_file(self, f): + path = f.get_path() + if os.path.exists(path): + os.unlink(path) + + @contextlib.contextmanager + def temp_file(self, f): + with open(f.get_path(), 'rb') as f: + yield f diff --git a/fbin/file_storage/s3.py b/fbin/file_storage/s3.py new file mode 100644 index 0000000..2f0b87b --- /dev/null +++ b/fbin/file_storage/s3.py @@ -0,0 +1,55 @@ +import contextlib +import tempfile + +import boto3 +from flask import request, send_file + +from .base import BaseStorage + +class Storage(BaseStorage): + def __init__(self, app): + super().__init__(app) + self.client = boto3.resource('s3', **self.app.config['S3_CONFIG']) + + def _get_object_key(self, file_hash, user_id): + return '{}_{}'.format(file_hash, user_id) + + def get_object_key(self, f): + return self._get_object_key(f.hash, f.user_id if f.user_id else 0) + + def store_file(self, uploaded_file, file_hash, user, ip): + bucket = self.client.Bucket(self.app.config['S3_BUCKET']) + key = self._get_object_key(file_hash, user.id if user else 0) + obj = bucket.upload_fileobj(Fileobj=uploaded_file.stream, Key=key) + size = uploaded_file.content_length + if not size: + obj = self.client.ObjectSummary(self.app.config['S3_BUCKET'], key) + size = obj.size + return self.add_file(file_hash, uploaded_file.filename, size, user, ip) + + def get_file(self, f): + obj = self.client.Object(self.app.config['S3_BUCKET'], self.get_object_key(f)) + kwargs = {} + if 'Range' in request.headers: + kwargs['Range'] = request.headers['Range'] + data = obj.get(**kwargs) + rv = send_file(data['Body'], attachment_filename=f.filename) + rv.headers['Content-Length'] = data['ContentLength'] + rv.headers['Accept-Ranges'] = data['AcceptRanges'] + if 'ContentRange' in data: + rv.headers['Content-Range'] = data['ContentRange'] + rv.status_code = 206 + return rv + + def delete_file(self, f): + obj = self.client.Object(self.app.config['S3_BUCKET'], self.get_object_key(f)) + obj.delete() + + @contextlib.contextmanager + def temp_file(self, f): + obj = self.client.Object(self.app.config['S3_BUCKET'], self.get_object_key(f)) + with tempfile.NamedTemporaryFile() as f: + obj.download_fileobj(f) + f.seek(0) + yield f + -- cgit v1.2.3