From ac0ece82fe81b4dfcd0c1e722f520337d5c4ea5f Mon Sep 17 00:00:00 2001 From: Jon Bergli Heier Date: Wed, 24 Jul 2019 09:36:18 +0200 Subject: fbin-scanner: Add support for file storage modules --- fbin-scanner.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/fbin-scanner.py b/fbin-scanner.py index 24bde76..8bd0d6b 100644 --- a/fbin-scanner.py +++ b/fbin-scanner.py @@ -4,12 +4,13 @@ import argparse from collections import deque import fcntl import hashlib +import importlib import logging import os import sys import time -from flask import Flask +from flask import Flask, current_app import requests FILE_DELAY = 15 @@ -37,7 +38,7 @@ except OSError: logger.error('Cannot acquire lock; another process is running') sys.exit(1) -def get_report(dbfile, digest): +def get_report(dbfile, digest, fileobj): logger.info('Fetching file report') params = { 'apikey': app.config['VIRUSTOTAL_API_KEY'], @@ -55,7 +56,7 @@ def get_report(dbfile, digest): logger.info('No report, submitting file') response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan', params={'apikey': app.config['VIRUSTOTAL_API_KEY']}, - files={'file': (dbfile.filename, open(dbfile.get_path(), 'rb'))}, + files={'file': (dbfile.filename, fileobj)}, ) response.raise_for_status() data = response.json() @@ -78,29 +79,31 @@ def get_report(dbfile, digest): logger.warning('Unknown response: %s', data) def main(): + storage = importlib.import_module(current_app.config.get('STORAGE_MODULE')).Storage(current_app) with session_scope() as session: files = deque(session.query(File).filter(File.scanned == False).all()) while len(files): dbfile = files.pop() - if not dbfile.exists: - logger.warning('Ignoring missing file %s', dbfile.get_path()) + if not dbfile.get_size(): + logger.info('Ignoring file %s/%s due to unknown size', dbfile.filename, dbfile.hash) continue if dbfile.get_size() > 32*10**6: - logger.info('Ignoring file %s due to size (%s)', dbfile.get_path(), dbfile.formatted_size) + logger.info('Ignoring file %s/%s due to size (%s)', dbfile.filename, dbfile.hash, dbfile.formatted_size) continue - logger.info('Checking file %s (%s)', dbfile.get_path(), dbfile.formatted_size) - h = hashlib.sha256() - with open(dbfile.get_path(), 'rb') as f: - chunk = f.read(2**10*16) - while chunk: - h.update(chunk) - chunk = f.read(2**10*16) - digest = h.hexdigest() - logger.info('SHA-256: %s', digest) + logger.info('Checking file %s/%s (%s)', dbfile.filename, dbfile.hash, dbfile.formatted_size) try: - report = get_report(dbfile, digest) + with storage.temp_file(dbfile) as f: + h = hashlib.sha256() + chunk = f.read(2**10*16) + while chunk: + h.update(chunk) + chunk = f.read(2**10*16) + f.seek(0) + digest = h.hexdigest() + logger.info('SHA-256: %s', digest) + report = get_report(dbfile, digest, f) except: - logger.exception('Failed to get report for %s', dbfile.get_path()) + logger.exception('Failed to get report for %s/%s', dbfile.filename, dbfile.hash) # Most likely an error from virustotal, so just break here and retry later. break dbfile.scanned = True -- cgit v1.2.3