summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fbin-scanner.py37
1 files changed, 20 insertions, 17 deletions
diff --git a/fbin-scanner.py b/fbin-scanner.py
index 24bde76..8bd0d6b 100644
--- a/fbin-scanner.py
+++ b/fbin-scanner.py
@@ -4,12 +4,13 @@ import argparse
from collections import deque
import fcntl
import hashlib
+import importlib
import logging
import os
import sys
import time
-from flask import Flask
+from flask import Flask, current_app
import requests
FILE_DELAY = 15
@@ -37,7 +38,7 @@ except OSError:
logger.error('Cannot acquire lock; another process is running')
sys.exit(1)
-def get_report(dbfile, digest):
+def get_report(dbfile, digest, fileobj):
logger.info('Fetching file report')
params = {
'apikey': app.config['VIRUSTOTAL_API_KEY'],
@@ -55,7 +56,7 @@ def get_report(dbfile, digest):
logger.info('No report, submitting file')
response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan',
params={'apikey': app.config['VIRUSTOTAL_API_KEY']},
- files={'file': (dbfile.filename, open(dbfile.get_path(), 'rb'))},
+ files={'file': (dbfile.filename, fileobj)},
)
response.raise_for_status()
data = response.json()
@@ -78,29 +79,31 @@ def get_report(dbfile, digest):
logger.warning('Unknown response: %s', data)
def main():
+ storage = importlib.import_module(current_app.config.get('STORAGE_MODULE')).Storage(current_app)
with session_scope() as session:
files = deque(session.query(File).filter(File.scanned == False).all())
while len(files):
dbfile = files.pop()
- if not dbfile.exists:
- logger.warning('Ignoring missing file %s', dbfile.get_path())
+ if not dbfile.get_size():
+ logger.info('Ignoring file %s/%s due to unknown size', dbfile.filename, dbfile.hash)
continue
if dbfile.get_size() > 32*10**6:
- logger.info('Ignoring file %s due to size (%s)', dbfile.get_path(), dbfile.formatted_size)
+ logger.info('Ignoring file %s/%s due to size (%s)', dbfile.filename, dbfile.hash, dbfile.formatted_size)
continue
- logger.info('Checking file %s (%s)', dbfile.get_path(), dbfile.formatted_size)
- h = hashlib.sha256()
- with open(dbfile.get_path(), 'rb') as f:
- chunk = f.read(2**10*16)
- while chunk:
- h.update(chunk)
- chunk = f.read(2**10*16)
- digest = h.hexdigest()
- logger.info('SHA-256: %s', digest)
+ logger.info('Checking file %s/%s (%s)', dbfile.filename, dbfile.hash, dbfile.formatted_size)
try:
- report = get_report(dbfile, digest)
+ with storage.temp_file(dbfile) as f:
+ h = hashlib.sha256()
+ chunk = f.read(2**10*16)
+ while chunk:
+ h.update(chunk)
+ chunk = f.read(2**10*16)
+ f.seek(0)
+ digest = h.hexdigest()
+ logger.info('SHA-256: %s', digest)
+ report = get_report(dbfile, digest, f)
except:
- logger.exception('Failed to get report for %s', dbfile.get_path())
+ logger.exception('Failed to get report for %s/%s', dbfile.filename, dbfile.hash)
# Most likely an error from virustotal, so just break here and retry later.
break
dbfile.scanned = True