summaryrefslogtreecommitdiff
path: root/fbin-scanner.py
blob: 76bc33b2cb8a65c013aa1217e6849373e2e12ad7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python

import argparse
from collections import deque
import fcntl
import hashlib
import importlib
import logging
import os
import sys
import time

from flask import Flask, current_app
import requests

FILE_DELAY = 15
SCAN_DELAY = 60

LOGGING_CONFIG = {
    'level': logging.INFO,
    'style': '{',
    'format': '{asctime} [{levelname}] {message}',
}

logging.basicConfig(**LOGGING_CONFIG)

logger = logging.getLogger('fbin-scanner')

parser = argparse.ArgumentParser()
parser.add_argument('--lock-file', default='/tmp/fbin-scanner.lock')
parser.add_argument('-c', '--config-file', default='fbin/fbin.cfg')
args = parser.parse_args()

lock_file = open(args.lock_file, 'w')
try:
    fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
except OSError:
    logger.error('Cannot acquire lock; another process is running')
    sys.exit(1)

def get_report(dbfile, digest, fileobj):
    logger.info('Fetching file report')
    params = {
        'apikey': app.config['VIRUSTOTAL_API_KEY'],
        'resource': digest,
    }
    response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params)
    response.raise_for_status()
    data = response.json()
    # Report found
    if data['response_code'] == 1:
        return data
    scan_id = None
    # No report, submit file for scan
    if data['response_code'] == 0:
        logger.info('No report, submitting file')
        response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan',
                params={'apikey': app.config['VIRUSTOTAL_API_KEY']},
                files={'file': (dbfile.filename, fileobj)},
        )
        response.raise_for_status()
        data = response.json()
        if data['response_code'] != 1:
            logger.error('Scan failed')
            return
        scan_id = data['scan_id']
        logger.info('File submitted with scan_id %s, waiting for scan report', scan_id)
        params['resource'] = scan_id
    # File was submitted or is queued for scan
    if scan_id or data['response_code'] == -2:
        # TODO: consider adding a timeout here
        while True:
            time.sleep(SCAN_DELAY)
            response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params)
            response.raise_for_status()
            data = response.json()
            if data['response_code'] == 1:
                return data
    logger.warning('Unknown response: %s', data)

def main():
    storage = importlib.import_module(current_app.config.get('STORAGE_MODULE', 'fbin.file_storage.filesystem')).Storage(current_app)
    files = deque(db.session.query(File).filter(File.scanned == False).all())
    while len(files):
        dbfile = files.pop()
        if not dbfile.get_size():
            logger.info('Ignoring file %s/%s due to unknown size', dbfile.filename, dbfile.hash)
            continue
        if dbfile.get_size() > 32*10**6:
            logger.info('Ignoring file %s/%s due to size (%s)', dbfile.filename, dbfile.hash, dbfile.formatted_size)
            continue
        logger.info('Checking file %s/%s (%s)', dbfile.filename, dbfile.hash, dbfile.formatted_size)
        try:
            with storage.temp_file(dbfile) as f:
                h = hashlib.sha256()
                chunk = f.read(2**10*16)
                while chunk:
                    h.update(chunk)
                    chunk = f.read(2**10*16)
                f.seek(0)
                digest = h.hexdigest()
                logger.info('SHA-256: %s', digest)
                report = get_report(dbfile, digest, f)
        except:
            logger.exception('Failed to get report for %s/%s', dbfile.filename, dbfile.hash)
            # Most likely an error from virustotal, so just break here and retry later.
            break
        dbfile.scanned = True
        if report and any(r.get('detected', False) for r in report['scans'].values()):
            logger.warning('Positive match')
            dbfile.blocked_reason = report
        else:
            logger.info('No match')
        db.session.add(dbfile)
        db.session.commit()
        time.sleep(FILE_DELAY)
    logger.info('No more files to scan')

app = Flask('scanner')
with app.app_context():
    app.config.from_pyfile(args.config_file)
    from fbin.db import db, File
    db.init_app(app)
    config = app.config
    main()

fcntl.flock(lock_file, fcntl.LOCK_UN)
lock_file.close()
os.unlink(args.lock_file)