#!/usr/bin/env python import argparse from collections import deque import datetime import fcntl import hashlib import importlib import logging import os import sys import time from flask import Flask, current_app import requests FILE_DELAY = 15 SCAN_DELAY = 60 LOGGING_CONFIG = { 'level': logging.INFO, 'style': '{', 'format': '{asctime} [{levelname}] {message}', } logging.basicConfig(**LOGGING_CONFIG) logger = logging.getLogger('fbin-scanner') parser = argparse.ArgumentParser() parser.add_argument('--lock-file', default='/tmp/fbin-scanner.lock') parser.add_argument('-c', '--config-file', default='fbin/fbin.cfg') parser.add_argument('-p', '--poll', action='store_true') args = parser.parse_args() lock_file = open(args.lock_file, 'w') try: fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB) except OSError: logger.error('Cannot acquire lock; another process is running') sys.exit(1) def get_report(dbfile, digest, fileobj): logger.info('Fetching file report') params = { 'apikey': app.config['VIRUSTOTAL_API_KEY'], 'resource': digest, } response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params) response.raise_for_status() data = response.json() # Report found if data['response_code'] == 1: return data scan_id = None # No report, submit file for scan if data['response_code'] == 0: logger.info('No report, submitting file') response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan', params={'apikey': app.config['VIRUSTOTAL_API_KEY']}, files={'file': (dbfile.filename, fileobj)}, ) response.raise_for_status() data = response.json() if data['response_code'] != 1: logger.error('Scan failed') return scan_id = data['scan_id'] logger.info('File submitted with scan_id %s, waiting for scan report', scan_id) params['resource'] = scan_id # File was submitted or is queued for scan if scan_id or data['response_code'] == -2: # TODO: consider adding a timeout here while True: time.sleep(SCAN_DELAY) response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params) response.raise_for_status() data = response.json() if data['response_code'] == 1: return data logger.warning('Unknown response: %s', data) def process_files(filters=None): storage = importlib.import_module(current_app.config.get('STORAGE_MODULE', 'fbin.file_storage.filesystem')) \ .Storage(current_app) files = deque(db.session.query(File).filter(*filters).all()) if not files: return False while len(files): dbfile = files.pop() if not dbfile.size: logger.info('Ignoring file %s/%s due to unknown size', dbfile.filename, dbfile.hash) continue if dbfile.size > 32 * 10**6: logger.info('Ignoring file %s/%s due to size (%s)', dbfile.filename, dbfile.hash, dbfile.formatted_size) continue logger.info('Checking file %s/%s (%s)', dbfile.filename, dbfile.hash, dbfile.formatted_size) try: with storage.temp_file(dbfile) as f: h = hashlib.sha256() chunk = f.read(2**10 * 16) while chunk: h.update(chunk) chunk = f.read(2**10 * 16) f.seek(0) digest = h.hexdigest() logger.info('SHA-256: %s', digest) report = get_report(dbfile, digest, f) except FileNotFoundError: logger.error('Skipping non-existent file %s/%s', dbfile.filename, dbfile.hash) dbfile.scanned = True db.session.add(dbfile) db.session.commit() continue except Exception: logger.exception('Failed to get report for %s/%s', dbfile.filename, dbfile.hash) # Most likely an error from virustotal, so just break here and retry later. break dbfile.scanned = True if report and any(r.get('detected', False) for r in report['scans'].values()): logger.warning('Positive match') dbfile.blocked_reason = report else: logger.info('No match') db.session.add(dbfile) db.session.commit() time.sleep(FILE_DELAY) logger.info('No more files to scan') return True def poll_loop(): now = datetime.datetime(1970, 1, 1) next_now = datetime.datetime.utcnow() while True: try: process_files([File.scanned == False, File.date >= now]) # noqa: E712 now = next_now next_now = datetime.datetime.utcnow() except Exception: # On errors, log the exception and retry. Don't update 'now' so that we will retry with the same files. # This tends to happen eg. when we process a file but it's been deleted when we try to update it. logger.exception('Error while processing files') db.session.rollback() time.sleep(60) def main(): if args.poll: poll_loop() else: if not process_files([File.scanned == False]): # noqa: E712 logger.info('No files to scan were found') app = Flask('scanner') with app.app_context(): app.config.from_pyfile(args.config_file) from fbin.db import db, File db.init_app(app) config = app.config main() fcntl.flock(lock_file, fcntl.LOCK_UN) lock_file.close() os.unlink(args.lock_file)