diff options
| -rw-r--r-- | fbin-scanner.py | 127 | ||||
| -rw-r--r-- | fbin/db.py | 4 | ||||
| -rwxr-xr-x | fbin/fbin.py | 2 | 
3 files changed, 131 insertions, 2 deletions
diff --git a/fbin-scanner.py b/fbin-scanner.py new file mode 100644 index 0000000..d520680 --- /dev/null +++ b/fbin-scanner.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python + +import argparse +from collections import deque +import fcntl +import hashlib +import logging +import os +import sys +import time + +from flask import Flask +import requests + +FILE_DELAY = 15 +SCAN_DELAY = 60 + +LOGGING_CONFIG = { +    'level': logging.INFO, +    'style': '{', +    'format': '{asctime} [{levelname}] {message}', +} + +logging.basicConfig(**LOGGING_CONFIG) + +logger = logging.getLogger('fbin-scanner') + +parser = argparse.ArgumentParser() +parser.add_argument('--lock-file', default='/tmp/fbin-scanner.lock') +parser.add_argument('-c', '--config-file', default='fbin/fbin.cfg') +args = parser.parse_args() + +lock_file = open(args.lock_file, 'w') +try: +    fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB) +except OSError: +    logger.error('Cannot acquire lock; another process is running') +    sys.exit(1) + +def get_report(dbfile, digest): +    logger.info('Fetching file report') +    params = { +        'apikey': app.config['VIRUSTOTAL_API_KEY'], +        'resource': digest, +    } +    response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params) +    response.raise_for_status() +    data = response.json() +    # Report found +    if data['response_code'] == 1: +        return data +    scan_id = None +    # No report, submit file for scan +    if data['response_code'] == 0: +        logger.info('No report, submitting file') +        response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan', +                params={'apikey': app.config['VIRUSTOTAL_API_KEY']}, +                files={'file': (dbfile.filename, open(dbfile.get_path(), 'rb'))}, +        ) +        response.raise_for_status() +        data = response.json() +        if data['response_code'] != 1: +            logger.error('Scan failed') +            return +        scan_id = data['scan_id'] +        logger.info('File submitted with scan_id %s, waiting for scan report', scan_id) +        params['resource'] = scan_id +    # File was submitted or is queued for scan +    if scan_id or data['response_code'] == -2: +        # TODO: consider adding a timeout here +        while True: +            time.sleep(SCAN_DELAY) +            response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params) +            response.raise_for_status() +            data = response.json() +            if data['response_code'] == 1: +                return data +    logger.warning('Unknown response: %s', data) + +def main(): +    with session_scope() as session: +        files = deque(session.query(File).filter(File.scanned == False, File.filename != 'ZwbZh6o.gif').all()) +        while len(files): +            dbfile = files.pop() +            if not dbfile.exists: +                logger.warning('Ignoring missing file %s', dbfile.get_path()) +                continue +            if dbfile.get_size() > 2**20*32: +                logger.info('Ignoring file %s due to size (%s)', dbfile.get_path(), dbfile.formatted_size) +                continue +            logger.info('Checking file %s (%s)', dbfile.get_path(), dbfile.formatted_size) +            h = hashlib.sha256() +            with open(dbfile.get_path(), 'rb') as f: +                chunk = f.read(2**10*16) +                while chunk: +                    h.update(chunk) +                    chunk = f.read(2**10*16) +            digest = h.hexdigest() +            logger.info('SHA-256: %s', digest) +            try: +                report = get_report(dbfile, digest) +            except: +                logger.exception('Failed to get report for %s', dbfile.get_path()) +                # Most likely an error from virustotal, so just break here and retry later. +                break +            dbfile.scanned = True +            if report and any(r.get('detected', False) for r in report['scans'].values()): +                logger.warning('Positive match') +                dbfile.blocked_reason = report +            else: +                logger.info('No match') +            session.add(dbfile) +            session.commit() +            break +            time.sleep(FILE_DELAY) +        logger.info('No more files to scan') + +app = Flask('scanner') +with app.app_context(): +    app.config.from_pyfile(args.config_file) +    from fbin.db import session_scope, File +    config = app.config +    main() + +fcntl.flock(lock_file, fcntl.LOCK_UN) +lock_file.close() +os.unlink(args.lock_file) @@ -4,7 +4,7 @@ import mimetypes  import os  from flask import current_app -from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text, Index, ForeignKey, Boolean +from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text, Index, ForeignKey, Boolean, JSON  from sqlalchemy.ext.declarative import declarative_base  from sqlalchemy.orm import sessionmaker, relation, backref  from sqlalchemy.orm.exc import NoResultFound @@ -52,6 +52,8 @@ class File(Base):      user_id = Column(Integer, ForeignKey('users.id'), nullable = True)      ip = Column(String)      accessed = Column(DateTime) +    scanned = Column(Boolean, nullable=False, default=False) +    blocked_reason = Column(JSON)      def __init__(self, hash, filename, date, user_id = None, ip = None):          self.hash = hash diff --git a/fbin/fbin.py b/fbin/fbin.py index 91fa1c9..19f82ed 100755 --- a/fbin/fbin.py +++ b/fbin/fbin.py @@ -207,7 +207,7 @@ def uploaded(hash):  @app.route('/file/<hash:hash>/<path:filename>', endpoint = 'file')  def _file(hash, ext=None, filename=None):      f = get_file(hash) -    if not f or not f.exists: +    if not f or not f.exists or f.blocked_reason:          abort(404)      return send_file(f.get_path())  | 
