summaryrefslogtreecommitdiff
path: root/fbin-scanner.py
blob: 2e60522d5fe29c805d8e83d90a90f667325902c8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python

import argparse
from collections import deque
import fcntl
import hashlib
import importlib
import logging
import os
import sys
import time

from flask import Flask, current_app
import requests

FILE_DELAY = 15
SCAN_DELAY = 60

LOGGING_CONFIG = {
    'level': logging.INFO,
    'style': '{',
    'format': '{asctime} [{levelname}] {message}',
}

logging.basicConfig(**LOGGING_CONFIG)

logger = logging.getLogger('fbin-scanner')

parser = argparse.ArgumentParser()
parser.add_argument('--lock-file', default='/tmp/fbin-scanner.lock')
parser.add_argument('-c', '--config-file', default='fbin/fbin.cfg')
args = parser.parse_args()

lock_file = open(args.lock_file, 'w')
try:
    fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
except OSError:
    logger.error('Cannot acquire lock; another process is running')
    sys.exit(1)

def get_report(dbfile, digest, fileobj):
    logger.info('Fetching file report')
    params = {
        'apikey': app.config['VIRUSTOTAL_API_KEY'],
        'resource': digest,
    }
    response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params)
    response.raise_for_status()
    data = response.json()
    # Report found
    if data['response_code'] == 1:
        return data
    scan_id = None
    # No report, submit file for scan
    if data['response_code'] == 0:
        logger.info('No report, submitting file')
        response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan',
                params={'apikey': app.config['VIRUSTOTAL_API_KEY']},
                files={'file': (dbfile.filename, fileobj)},
        )
        response.raise_for_status()
        data = response.json()
        if data['response_code'] != 1:
            logger.error('Scan failed')
            return
        scan_id = data['scan_id']
        logger.info('File submitted with scan_id %s, waiting for scan report', scan_id)
        params['resource'] = scan_id
    # File was submitted or is queued for scan
    if scan_id or data['response_code'] == -2:
        # TODO: consider adding a timeout here
        while True:
            time.sleep(SCAN_DELAY)
            response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params)
            response.raise_for_status()
            data = response.json()
            if data['response_code'] == 1:
                return data
    logger.warning('Unknown response: %s', data)

def main():
    storage = importlib.import_module(current_app.config.get('STORAGE_MODULE', 'fbin.file_storage.filesystem')).Storage(current_app)
    with session_scope() as session:
        files = deque(session.query(File).filter(File.scanned == False).all())
        while len(files):
            dbfile = files.pop()
            if not dbfile.get_size():
                logger.info('Ignoring file %s/%s due to unknown size', dbfile.filename, dbfile.hash)
                continue
            if dbfile.get_size() > 32*10**6:
                logger.info('Ignoring file %s/%s due to size (%s)', dbfile.filename, dbfile.hash, dbfile.formatted_size)
                continue
            logger.info('Checking file %s/%s (%s)', dbfile.filename, dbfile.hash, dbfile.formatted_size)
            try:
                with storage.temp_file(dbfile) as f:
                    h = hashlib.sha256()
                    chunk = f.read(2**10*16)
                    while chunk:
                        h.update(chunk)
                        chunk = f.read(2**10*16)
                    f.seek(0)
                    digest = h.hexdigest()
                    logger.info('SHA-256: %s', digest)
                    report = get_report(dbfile, digest, f)
            except:
                logger.exception('Failed to get report for %s/%s', dbfile.filename, dbfile.hash)
                # Most likely an error from virustotal, so just break here and retry later.
                break
            dbfile.scanned = True
            if report and any(r.get('detected', False) for r in report['scans'].values()):
                logger.warning('Positive match')
                dbfile.blocked_reason = report
            else:
                logger.info('No match')
            session.add(dbfile)
            session.commit()
            time.sleep(FILE_DELAY)
        logger.info('No more files to scan')

app = Flask('scanner')
with app.app_context():
    app.config.from_pyfile(args.config_file)
    from fbin.db import session_scope, File
    config = app.config
    main()

fcntl.flock(lock_file, fcntl.LOCK_UN)
lock_file.close()
os.unlink(args.lock_file)