initial commit

author: Linus Nordberg <linus@nordberg.se> 2021-06-21 16:34:11 +0200
committer: Linus Nordberg <linus@nordberg.se> 2021-06-21 16:34:11 +0200
commit: 475a215ef62140d5656ba794cc29e67226587bbc (patch)
tree: 450b5c0f7d0f7df1f49fda1b5d29f8034cb4c0fb
4 files changed, 415 insertions, 0 deletions
diff --git a/src/README.md b/src/README.md
new file mode 100644
index 0000000..dc5b771
--- /dev/null
+++ b/src/README.md
@@ -0,0 +1,62 @@
+# soc_collector -- Gathering vulnerability information and presenting it
+
+## The oneliner
+
+The collector answers the fundamental question constantly posed by all
+SOC personnel ever: Can I go for lunch now?
+
+## The elevator pitch
+
+Your're working as a Security Operations Center engineer and your job
+is to, one, know when any part of your infrastructure is vulnerable
+and, two, if it is, do something smart about it.
+
+The collector compiles data from vulnerability scanners and stores it
+in a database. You query the collector for the current vulnerability
+status of your network infrastructure.
+
+Without a summary of your vulnerability status and ability to quickly
+deepen your knowledge of a given system, your chances of ever eating
+lunch with a clear conscience are slim.
+
+## The user interface
+
+TODO
+
+## The gory^Wtechnical details
+
+TODO
+
+## The name
+
+The "soc" part means Security Operations Center.
+
+The "collector" part is correct but misleading since `soc_collector`
+also processes and presents.
+
+## The license
+
+This code is licensed under the 2-Clause BSD License, see LICENSE for
+the full text.
+
+## How to test it out
+
+The collector has been tested on Debian 10 (Buster). Other Unix
+systems should be possible to host it as well.
+
+On a Debian system, first install the dependencies.
+
+    sudo apt install python3 python3-pip python3-leveldb
+	pip3 install falcon
+
+Start the demo HTTP server.
+
+    python3 src/wsgi.py
+
+Submit some data.
+
+    echo '[{"a":"räksmörgås","domain":"foo.se"},{"bar":"Baz"}]' | curl -s -u admin:admin --data-binary @- http://localhost:8000/sc/v0/add
+
+Get the same data back.
+
+    curl -s -u foo.se: http://localhost:8000/sc/v0/get | json_pp -json_opt utf8,pretty
diff --git a/src/db.py b/src/db.py
new file mode 100755
index 0000000..4702def
--- /dev/null
+++ b/src/db.py
@@ -0,0 +1,171 @@
+#! /usr/bin/env python3
+
+# A database storing dictionaries, keyed on a timestamp.
+
+# key = 8 octets timestamp | 1 octet version
+#       struct.pack('!dB', time.time(), 0)
+
+# value = A dict which will be stored as a JSON object encoded in
+# UTF-8. Note that dict keys of type integer or float will become
+# strings while values will keep their type.
+
+# Note that there's a (slim) chance that you'd stomp on the previous
+# value if you're too quick with generating the timestamps, ie
+# invoking time.time() several times quickly enough.
+
+from store import KVStore
+import json
+from pathlib import PurePath
+import time
+import struct
+import os
+
+class DictDB(KVStore):
+    VERSION = 0
+
+    # TODO: implement indexes
+    # TODO: implement search(dict key) for indexed fields
+
+    def __init__(self, name, basedir='.'):
+        super().__init__(name, basedir)
+        self._ts = time.time()
+        self._index = {}
+
+    def unique_key(self):
+        ts = time.time()
+        while ts == self._ts:
+            ts = time.time()
+        self._ts = ts
+        return struct.pack('!dB', ts, DictDB.VERSION)
+
+    def index_add(self, path):
+        name = PurePath(path).name
+        self._index[name] = DBIndex(path)
+
+    def add(self, data, batch_write=False):
+        if type(data) is list:
+            ret = []
+            if batch_write:     # Supposedly makes the update atomic.
+                batch = self.batch()
+            for e in data:
+                ret += [self.add(e)]
+            if batch_write:
+                self.write(batch)
+            return ret
+        else:
+            key = self.unique_key()
+            json_data = json.dumps(data).encode('UTF-8')
+            self.put(key, json_data)
+            return key
+
+    def get(self, key):
+        enc = super().get(key)  # name collision, specify super class
+        ret = json.loads(enc.decode('UTF-8'))
+        return ret
+
+    def slice(self, key_from=None, key_to=None):
+        ret = []
+        for key_raw, val_raw in list(self.range_iter(key_from=key_from, key_to=key_to)):
+            (key, ver) = struct.unpack('!dB', key_raw)
+            if ver == DictDB.VERSION:
+                val = json.loads(val_raw.decode('UTF-8'))
+                ret.append((key, val))
+        return ret
+
+    # Search top level dict for objects with a name matching DICT_NAME
+    # and optionally value DICT_VAL.
+    def search(self, dict_name, dict_val=None):
+        res = []
+        for key, obj in self.slice():
+            if dict_name in obj:
+                if dict_val is None:
+                    res.append((key, obj))
+                elif dict_val == obj[dict_name]:
+                    res.append((key, obj))
+        return res
+
+    def timestamp_from_key(key):
+        ts, _ = struct.unpack('!dB', key)
+        return ts
+
+class DBIndex(KVStore):
+    def __init__(self, name, basedir='.'):
+        super().__init__(name, basedir)
+
+#     def update_or_create(self, key, val):
+#         curval = []
+#         try:
+#             curval = self._db.Get(key)
+#         except KeyError:
+#             self._db.Put(key, [val])
+#             return
+
+#         if curval is list:
+#             self._db.Put(key, curval + [val])
+#         else:
+#             self._db.Put(key, json.dumps([curval, val]))
+
+    # def index_full(self, name):
+    #     kv = self._index[name])
+    #     for key, val in self.range_iter():
+    #         valdict = json.loads(val)
+    #         field = valdict.get(name)
+    #         if field:
+    #             ix = kv.get(key)
+    #             if ix:
+    #                 kv.put(ix + [key])
+    #             else:
+    #                 kv.put([key])
+
+if __name__ == '__main__':
+    DBDIR = 'test_db.db'
+    # TODO: rm -r DBDIR
+    db = DictDB('db', basedir = DBDIR)
+    #ix = DBIndex('foo', basedir = DBDIR)
+
+    key = db.add({'foo': 'Bar'})
+    assert(db.get(key) == {'foo': 'Bar'})
+
+    key = db.add({0: 'Foo'})    # NOTE: int keys become strings
+    assert(db.get(key) == {'0': 'Foo'})
+
+    d = {'4711': 'Large number', '7': 'Small number', '0': 'Bar'}
+    key = db.add(d)
+    res = db.get(key)
+    assert(dict_eq(d, res))
+
+    key = db.add({'an int': 0})
+    assert(db.get(key) == {'an int': 0})
+
+    key = db.add({'a float': 1.1})
+    assert(db.get(key) == {'a float': 1.1})
+
+    # TODO: verify slice() too
+    for key, val in db.slice():
+        print(key, val)
+
+    res = db.search('an int')
+    assert(dict_eq(res[0][1], {'an int': 0}))
+
+    res = db.search('0')
+    assert(dict_eq(res[0][1], {'0': 'Foo'}))
+    assert(dict_eq(res[1][1], d))
+
+    res = db.search('7', dict_val = 'Small number') # FIXME: verify better -- do we hit only '7' here, f.ex.?
+    assert('7' in res[0][1])
+
+    res = db.search('7', dict_val = 'not matching')
+    assert(not res)
+
+    N = 10 * 1000               # 10k takes ~0.2s.
+    data = [{str(x): x} for x in range(N)]
+    keys = db.add(data, batch_write = False)
+    assert(len(keys) == N)
+    for k in range(len(keys)):
+        assert(db.get(keys[k]) == data[k])
+
+
+from operator import itemgetter
+def dict_eq(a, b):
+    sort_on_key = lambda d: sorted(d.items(), key=itemgetter(0))
+    return sort_on_key(a) == sort_on_key(b)
diff --git a/src/store.py b/src/store.py
new file mode 100755
index 0000000..bc2c4ff
--- /dev/null
+++ b/src/store.py
@@ -0,0 +1,62 @@
+#! /usr/bin/env python3
+
+from leveldb import LevelDB, WriteBatch
+import time
+from pathlib import PurePath
+import os
+
+class KVStore:
+    """Wraps a Python wrapper for LevelDB in case we want to change wrapper
+    """
+    def __init__(self, name, basedir='.'):
+        os.makedirs(basedir, exist_ok = True)
+        path = str(PurePath(basedir).joinpath(name))
+        self._db = LevelDB(path)
+
+    def get(self, key):
+        try:
+            val = self._db.Get(key)
+        except KeyError:
+            val = None          # You can thus not store None!
+        return val
+
+    def put(self, key, val):
+        self._db.Put(key, val)
+
+    def delete(self, key):
+        self._db.Delete(key)
+
+    def range_iter(self, key_from=None, key_to=None):
+        return self._db.RangeIter(key_from=key_from, key_to=key_to)
+
+    def batch(self):
+        return WriteBatch()
+
+    def write(self, batch):
+        self._db.Write(batch, sync=True)
+
+    def timestamp_asc():
+        return str(time.time()).encode('ascii')
+
+
+def test_store(name):
+    # TODO: rm -r name
+    db = KVStore(name)
+
+    ts0 = KVStore.timestamp_asc()
+    db.put(ts0, b'Bar')
+    assert(db.get(ts0) == b'Bar')
+
+    ts1 = KVStore.timestamp_asc()
+    db.put(ts1, b'Foo')
+    assert(db.get(ts1) == b'Foo')
+
+    assert(list(db.range_iter()) == [(ts0, b'Bar'), (ts1, b'Foo')])
+
+    db.delete(ts0)
+    assert(db.get(ts0) is None)
+
+    return db
+
+if __name__ == '__main__':
+    test_store('test_store.db')
diff --git a/src/wsgi.py b/src/wsgi.py
new file mode 100755
index 0000000..1eda9de
--- /dev/null
+++ b/src/wsgi.py
@@ -0,0 +1,120 @@
+#! /usr/bin/env python3
+
+import sys
+from wsgiref.simple_server import make_server
+import falcon
+import json
+from db import DictDB
+import time
+from base64 import b64decode
+
+class CollectorResource():
+    def __init__(self, db):
+        self._db = db
+
+    def parse_error(data):
+        return "I want valid JSON but got this:\n{}\n".format(data)
+
+    def user_authn(self, auth_header, authfun):
+        if not auth_header:
+            return None         # Fail.
+        BAlit, b64 = auth_header.split()
+        if BAlit != "Basic":
+            return None         # Fail
+        userbytes, pwbytes = b64decode(b64).split(b':')
+        try:
+            user = userbytes.decode('ascii')
+        except:
+            return None         # Fail
+        if authfun(user, pwbytes):
+            return user         # Success.
+        return None             # Fail.
+
+
+class EPGet(CollectorResource):
+    def on_get(self, req, resp):
+        resp.status = falcon.HTTP_200
+        resp.content_type = falcon.MEDIA_JSON
+        out = []
+
+        userid = self.user_authn(req.auth, lambda user,_pw: user is not None)
+        if not userid:
+            resp.status = falcon.HTTP_401
+            resp.text = 'Invalid user or password\n'
+            return
+
+        out = [{time.ctime(key): dict} for (key, dict) in self._db.search('domain', dict_val=userid)]
+
+        resp.text = json.dumps(out) + '\n'
+
+
+class EPAdd(CollectorResource):
+    def on_post(self, req, resp):
+        resp.status = falcon.HTTP_200
+        resp.content_type = falcon.MEDIA_TEXT
+        self._indata = []
+
+        if self.user_authn(req.auth,
+                           lambda u,p: u == 'admin' and p == b'admin') is None:
+            resp.status = falcon.HTTP_401
+            resp.text = 'Invalid user or password\n'
+            return
+
+        # TODO: can we do json.load(req.bounded_stream,
+        # cls=customDecoder) where our decoder calls JSONDecoder after
+        # decoding UTF-8?
+
+        # NOTE: Reading the whole body in one go instead of streaming
+        # it nicely.
+        rawin = req.bounded_stream.read()
+        try:
+            decodedin = rawin.decode('UTF-8')
+        except:
+            resp.status = falcon.HTTP_400
+            resp.text = 'Need UTF-8\n'
+            return
+
+        try:
+            keys = json.loads(decodedin, object_hook=self.handle_data)
+        except TypeError:
+            print('DEBUG: type error')
+            resp.status = falcon.HTTP_400
+            resp.text = CollectorResource.parse_error(decodedin)
+            return
+        except json.decoder.JSONDecodeError:
+            print('DEBUG: json decode error')
+            resp.status = falcon.HTTP_400
+            resp.text = CollectorResource.parse_error(decodedin)
+            return
+
+        resp.text = ''
+        for key in keys:
+            resp.text += repr(key) + '\n'
+
+    def handle_data(self, data):
+        return self._db.add(data) # return key
+
+
+def init(url_res_map, addr = '', port = 8000):
+    app = falcon.App()
+    for url, res in url_res_map:
+        app.add_route(url, res)
+
+    return make_server(addr, port, app)
+
+
+def main():
+    # Simple demo.
+    # Try adding some observations, basic auth admin:admin, and
+    # include {"domain": "foo.se"} in some of them.
+    # Try retreiving all observations for user 'foo.se' (basic auth
+    # foo.se:whatever).
+
+    db = DictDB('wsgi_demo.db')
+    httpd = init([('/sc/v0/add', EPAdd(db)),
+                  ('/sc/v0/get', EPGet(db))])
+    print('Serving on port 8000...')
+    httpd.serve_forever()
+
+if __name__ == '__main__':
+    sys.exit(main())
author	Linus Nordberg <linus@nordberg.se>	2021-06-21 16:34:11 +0200
committer	Linus Nordberg <linus@nordberg.se>	2021-06-21 16:34:11 +0200
commit	475a215ef62140d5656ba794cc29e67226587bbc (patch)
tree	450b5c0f7d0f7df1f49fda1b5d29f8034cb4c0fb