From 026dd78098b24b206518357365d6f22c3e5715b4 Mon Sep 17 00:00:00 2001 From: Jon Bergli Heier Date: Tue, 28 Aug 2012 23:04:20 +0200 Subject: Write temporary files to settings.file_directory on upload. We now only use one temporary file instead of two, and this file is renamed to the destination filename on completion. This should speed up things by avoiding copying files from tmpfs to disk. --- fbin.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/fbin.py b/fbin.py index 8a77738..bd025c0 100755 --- a/fbin.py +++ b/fbin.py @@ -1,7 +1,7 @@ #!/usr/bin/env python2 import templates -import settings, db, os, random, datetime, shutil, mimetypes, cgi, tempfile, hashlib, Cookie, urllib, subprocess +import settings, db, os, random, datetime, mimetypes, cgi, tempfile, hashlib, Cookie, urllib, subprocess from PIL import Image base62_alphabet = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' @@ -22,6 +22,11 @@ except OSError: else: has_mogrify = True +class FileUploadFieldStorage(cgi.FieldStorage): + def make_file(self, binary = None): + # Make a temporary file in the destination directory, which will be renamed on completion. + return tempfile.NamedTemporaryFile(prefix = 'upload_', dir = settings.file_directory, delete = False) + class Application(object): def get_user(self, username, password): session = db.Session() @@ -88,7 +93,7 @@ class Application(object): def add_file(self, path, filename, file_hash, user = None): hash = ''.join(random.choice(base62_alphabet) for x in xrange(5)) new_path = os.path.join(settings.file_directory, hash + os.path.splitext(filename)[1]) - shutil.copyfile(path, new_path) + os.rename(path, new_path) session = db.Session() try: @@ -227,23 +232,21 @@ class Application(object): def upload(self, environ, start_response, path): c = Cookie.SimpleCookie(environ['HTTP_COOKIE'] if 'HTTP_COOKIE' in environ else None) user = self.validate_cookie(c) - form = cgi.FieldStorage(fp = environ['wsgi.input'], environ = environ) + tempfile.tempdir = settings.file_directory + form = FileUploadFieldStorage(fp = environ['wsgi.input'], environ = environ) if environ['REQUEST_METHOD'] != 'POST' or not 'file' in form or not 'filename' in form: start_response('200 OK', [('Content-Type', 'text/html')]) return str(templates.upload(searchList = {'root': settings.virtual_root, 'user': user})) filename = form.getvalue('filename') - - temp = tempfile.NamedTemporaryFile(mode = 'wb', prefix = 'fbin', delete = True) - f = form['file'].file + temp = form['file'].file m = hashlib.md5() - s = f.read(128) + s = temp.read(128) while len(s): m.update(s) - temp.write(s) - s = f.read(128) - temp.flush() + s = temp.read(128) + temp.close() file_hash = m.hexdigest() f = self.get_file_by_file_hash(file_hash) @@ -251,11 +254,14 @@ class Application(object): if f: hash = f.hash else: + # temp.name will be moved to the destination filename hash = self.add_file(temp.name, filename, file_hash, user) # This avoids silly "not bound to a Session" errors when trying to use a newly added file object. f = self.get_file(hash) - temp.close() + # If temp.name still exists, we most likely uploaded a file whose file hash already exists, so just delete the file. + if os.path.exists(temp.name): + os.unlink(temp.name) mime = f.get_mime_type() # TODO: Apparently TIFF also supports EXIF, test this. -- cgit v1.2.3