From 608fa9690b6961b237b9e38fc6ec7c0916f92d1b Mon Sep 17 00:00:00 2001 From: Jon Bergli Heier Date: Tue, 2 Apr 2019 20:45:55 +0200 Subject: Add support for blocking files Files are blocked if blocked_reason is non-NULL. This value is currently not exposed publicly, instead a 404 will be returned. Files are scanned using virustotal.com's public API if scanned is False. Scans are performed by the fbin-scanner.py script. If a match is found, blocked_reason is set to the payload received. Files that are not in VT's database will be automatically submitted and the script will wait for the scan to complete before continuing. --- fbin-scanner.py | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fbin/db.py | 4 +- fbin/fbin.py | 2 +- 3 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 fbin-scanner.py diff --git a/fbin-scanner.py b/fbin-scanner.py new file mode 100644 index 0000000..d520680 --- /dev/null +++ b/fbin-scanner.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python + +import argparse +from collections import deque +import fcntl +import hashlib +import logging +import os +import sys +import time + +from flask import Flask +import requests + +FILE_DELAY = 15 +SCAN_DELAY = 60 + +LOGGING_CONFIG = { + 'level': logging.INFO, + 'style': '{', + 'format': '{asctime} [{levelname}] {message}', +} + +logging.basicConfig(**LOGGING_CONFIG) + +logger = logging.getLogger('fbin-scanner') + +parser = argparse.ArgumentParser() +parser.add_argument('--lock-file', default='/tmp/fbin-scanner.lock') +parser.add_argument('-c', '--config-file', default='fbin/fbin.cfg') +args = parser.parse_args() + +lock_file = open(args.lock_file, 'w') +try: + fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB) +except OSError: + logger.error('Cannot acquire lock; another process is running') + sys.exit(1) + +def get_report(dbfile, digest): + logger.info('Fetching file report') + params = { + 'apikey': app.config['VIRUSTOTAL_API_KEY'], + 'resource': digest, + } + response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params) + response.raise_for_status() + data = response.json() + # Report found + if data['response_code'] == 1: + return data + scan_id = None + # No report, submit file for scan + if data['response_code'] == 0: + logger.info('No report, submitting file') + response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan', + params={'apikey': app.config['VIRUSTOTAL_API_KEY']}, + files={'file': (dbfile.filename, open(dbfile.get_path(), 'rb'))}, + ) + response.raise_for_status() + data = response.json() + if data['response_code'] != 1: + logger.error('Scan failed') + return + scan_id = data['scan_id'] + logger.info('File submitted with scan_id %s, waiting for scan report', scan_id) + params['resource'] = scan_id + # File was submitted or is queued for scan + if scan_id or data['response_code'] == -2: + # TODO: consider adding a timeout here + while True: + time.sleep(SCAN_DELAY) + response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params) + response.raise_for_status() + data = response.json() + if data['response_code'] == 1: + return data + logger.warning('Unknown response: %s', data) + +def main(): + with session_scope() as session: + files = deque(session.query(File).filter(File.scanned == False, File.filename != 'ZwbZh6o.gif').all()) + while len(files): + dbfile = files.pop() + if not dbfile.exists: + logger.warning('Ignoring missing file %s', dbfile.get_path()) + continue + if dbfile.get_size() > 2**20*32: + logger.info('Ignoring file %s due to size (%s)', dbfile.get_path(), dbfile.formatted_size) + continue + logger.info('Checking file %s (%s)', dbfile.get_path(), dbfile.formatted_size) + h = hashlib.sha256() + with open(dbfile.get_path(), 'rb') as f: + chunk = f.read(2**10*16) + while chunk: + h.update(chunk) + chunk = f.read(2**10*16) + digest = h.hexdigest() + logger.info('SHA-256: %s', digest) + try: + report = get_report(dbfile, digest) + except: + logger.exception('Failed to get report for %s', dbfile.get_path()) + # Most likely an error from virustotal, so just break here and retry later. + break + dbfile.scanned = True + if report and any(r.get('detected', False) for r in report['scans'].values()): + logger.warning('Positive match') + dbfile.blocked_reason = report + else: + logger.info('No match') + session.add(dbfile) + session.commit() + break + time.sleep(FILE_DELAY) + logger.info('No more files to scan') + +app = Flask('scanner') +with app.app_context(): + app.config.from_pyfile(args.config_file) + from fbin.db import session_scope, File + config = app.config + main() + +fcntl.flock(lock_file, fcntl.LOCK_UN) +lock_file.close() +os.unlink(args.lock_file) diff --git a/fbin/db.py b/fbin/db.py index 58bf8ab..2bf153b 100644 --- a/fbin/db.py +++ b/fbin/db.py @@ -4,7 +4,7 @@ import mimetypes import os from flask import current_app -from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text, Index, ForeignKey, Boolean +from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text, Index, ForeignKey, Boolean, JSON from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import sessionmaker, relation, backref from sqlalchemy.orm.exc import NoResultFound @@ -52,6 +52,8 @@ class File(Base): user_id = Column(Integer, ForeignKey('users.id'), nullable = True) ip = Column(String) accessed = Column(DateTime) + scanned = Column(Boolean, nullable=False, default=False) + blocked_reason = Column(JSON) def __init__(self, hash, filename, date, user_id = None, ip = None): self.hash = hash diff --git a/fbin/fbin.py b/fbin/fbin.py index 91fa1c9..19f82ed 100755 --- a/fbin/fbin.py +++ b/fbin/fbin.py @@ -207,7 +207,7 @@ def uploaded(hash): @app.route('/file//', endpoint = 'file') def _file(hash, ext=None, filename=None): f = get_file(hash) - if not f or not f.exists: + if not f or not f.exists or f.blocked_reason: abort(404) return send_file(f.get_path()) -- cgit v1.2.3