diff options
author | Victor Näslund <victor@sunet.se> | 2022-11-02 15:31:23 +0100 |
---|---|---|
committer | Victor Näslund <victor@sunet.se> | 2022-11-02 15:31:23 +0100 |
commit | 8baecf339e8061160bee519e87ffe837d1525c18 (patch) | |
tree | 22664c10f22382b1d4647b5f2e96bcea4220d879 /src/collector/schema.py | |
parent | ffb26f4a81a9ca61c4105df037f7e1beb8dc5fb0 (diff) |
more freshup
Diffstat (limited to 'src/collector/schema.py')
-rw-r--r-- | src/collector/schema.py | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/src/collector/schema.py b/src/collector/schema.py new file mode 100644 index 0000000..e291f10 --- /dev/null +++ b/src/collector/schema.py @@ -0,0 +1,136 @@ +from typing import List, Any, Dict +import json +import sys +import traceback + +import jsonschema + +# fmt:off +# NOTE: Commented out properties are left intentionally, so it is easier to see +# what properties are optional. +schema = { + "$schema": "http://json-schema.org/schema#", + "type": "object", + "properties": { + "document_version": {"type": "integer"}, + "ip": {"type": "string"}, + "port": {"type": "integer"}, + "whois_description": {"type": "string"}, + "asn": {"type": "string"}, + "asn_country_code": {"type": "string"}, + "ptr": {"type": "string"}, + "abuse_mail": {"type": "string"}, + "domain": {"type": "string"}, + "timestamp": {"type": "string", "format": "date-time"}, + "display_name": {"type": "string"}, + "description": {"type": "string"}, + "custom_data": { + "type": "object", + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "display_name": {"type": "string"}, + "data": {"type": ["string", "boolean", "integer"]}, + "description": {"type": "string"}, + }, + "required": [ + "display_name", + "data", + # "description" + ] + }, + }, + }, + "result": { + "type": "object", + "patternProperties": { + ".*": { + "type": "object", + "properties": { + "display_name": {"type": "string"}, + "vulnerable": {"type": "boolean"}, + "investigation_needed": {"type": "boolean"}, + "reliability": {"type": "integer"}, + "description": {"type": "string"}, + }, + "oneOf": [ + { + "required": [ + "display_name", + "vulnerable", + # "reliability", # TODO: reliability is required if vulnerable = true + # "description", + ] + }, + { + "required": [ + "display_name", + "investigation_needed", + # "reliability", # TODO: reliability is required if investigation_needed = true + # "description", + ] + }, + ] + }, + }, + }, + }, + "required": [ + "document_version", + "ip", + "port", + "whois_description", + "asn", + "asn_country_code", + "ptr", + "abuse_mail", + "domain", + "timestamp", + "display_name", + # "description", + # "custom_data", + "result", + ], +} +# fmt:on + + +def get_index_keys() -> List[str]: + keys: List[str] = [] + for key in schema["properties"]: + keys.append(key) + return keys + + +def as_index_list() -> List[Dict[str, Any]]: + index_list: List[Dict[str, Any]] = [] + for key in schema["properties"]: + name = f"{key}-json-index" + index = { + "index": { + "fields": [ + key, + ] + }, + "name": name, + "type": "json", + } + index_list.append(index) + + return index_list + + +def validate_collector_data(json_blob: Dict[str, Any]) -> str: + try: + jsonschema.validate(json_blob, schema, format_checker=jsonschema.FormatChecker()) + except jsonschema.exceptions.ValidationError as e: + return f"Validation failed with error: {e.message}" + return "" + + +if __name__ == "__main__": + with open(sys.argv[1]) as fd: + json_data = json.loads(fd.read()) + + print(validate_collector_data(json_data)) |