#!/usr/bin/env python import argparse from collections import deque import fcntl import hashlib import importlib import logging import os import sys import time from flask import Flask, current_app import requests FILE_DELAY = 15 SCAN_DELAY = 60 LOGGING_CONFIG = { 'level': logging.INFO, 'style': '{', 'format': '{asctime} [{levelname}] {message}', } logging.basicConfig(**LOGGING_CONFIG) logger = logging.getLogger('fbin-scanner') parser = argparse.ArgumentParser() parser.add_argument('--lock-file', default='/tmp/fbin-scanner.lock') parser.add_argument('-c', '--config-file', default='fbin/fbin.cfg') args = parser.parse_args() lock_file = open(args.lock_file, 'w') try: fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB) except OSError: logger.error('Cannot acquire lock; another process is running') sys.exit(1) def get_report(dbfile, digest, fileobj): logger.info('Fetching file report') params = { 'apikey': app.config['VIRUSTOTAL_API_KEY'], 'resource': digest, } response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params) response.raise_for_status() data = response.json() # Report found if data['response_code'] == 1: return data scan_id = None # No report, submit file for scan if data['response_code'] == 0: logger.info('No report, submitting file') response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan', params={'apikey': app.config['VIRUSTOTAL_API_KEY']}, files={'file': (dbfile.filename, fileobj)}, ) response.raise_for_status() data = response.json() if data['response_code'] != 1: logger.error('Scan failed') return scan_id = data['scan_id'] logger.info('File submitted with scan_id %s, waiting for scan report', scan_id) params['resource'] = scan_id # File was submitted or is queued for scan if scan_id or data['response_code'] == -2: # TODO: consider adding a timeout here while True: time.sleep(SCAN_DELAY) response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params) response.raise_for_status() data = response.json() if data['response_code'] == 1: return data logger.warning('Unknown response: %s', data) def main(): storage = importlib.import_module(current_app.config.get('STORAGE_MODULE', 'fbin.file_storage.filesystem')).Storage(current_app) with session_scope() as session: files = deque(session.query(File).filter(File.scanned == False).all()) while len(files): dbfile = files.pop() if not dbfile.get_size(): logger.info('Ignoring file %s/%s due to unknown size', dbfile.filename, dbfile.hash) continue if dbfile.get_size() > 32*10**6: logger.info('Ignoring file %s/%s due to size (%s)', dbfile.filename, dbfile.hash, dbfile.formatted_size) continue logger.info('Checking file %s/%s (%s)', dbfile.filename, dbfile.hash, dbfile.formatted_size) try: with storage.temp_file(dbfile) as f: h = hashlib.sha256() chunk = f.read(2**10*16) while chunk: h.update(chunk) chunk = f.read(2**10*16) f.seek(0) digest = h.hexdigest() logger.info('SHA-256: %s', digest) report = get_report(dbfile, digest, f) except: logger.exception('Failed to get report for %s/%s', dbfile.filename, dbfile.hash) # Most likely an error from virustotal, so just break here and retry later. break dbfile.scanned = True if report and any(r.get('detected', False) for r in report['scans'].values()): logger.warning('Positive match') dbfile.blocked_reason = report else: logger.info('No match') session.add(dbfile) session.commit() time.sleep(FILE_DELAY) logger.info('No more files to scan') app = Flask('scanner') with app.app_context(): app.config.from_pyfile(args.config_file) from fbin.db import session_scope, File config = app.config main() fcntl.flock(lock_file, fcntl.LOCK_UN) lock_file.close() os.unlink(args.lock_file)