From 15d65c756fe89aca6cbcc754dc648853ca334095 Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Wed, 9 Mar 2016 06:58:08 +0100 Subject: Use python requests package instead of urllib2 --- tools/certtools.py | 194 ++++++++++++++++++++++++---------------------------- tools/merge_sth.py | 5 +- tools/mergetools.py | 97 ++++++++------------------ 3 files changed, 121 insertions(+), 175 deletions(-) diff --git a/tools/certtools.py b/tools/certtools.py index 6cb4f55..919460e 100644 --- a/tools/certtools.py +++ b/tools/certtools.py @@ -4,7 +4,6 @@ import subprocess import json import base64 -import urllib import urllib2 import ssl import urlparse @@ -16,6 +15,9 @@ import datetime import cStringIO import zipfile import shutil +import requests +import warnings + from certkeys import publickeys def get_cert_info(s): @@ -90,76 +92,48 @@ def get_root_cert(issuer): return root_cert class sslparameters: - sslcontext = None + cafile = None def create_ssl_context(cafile=None): try: - sslparameters.sslcontext = ssl.create_default_context(cafile=cafile) + sslparameters.cafile = cafile except AttributeError: - sslparameters.sslcontext = None + sslparameters.cafile = None -def get_opener(): - try: - opener = urllib2.build_opener(urllib2.HTTPSHandler(context=sslparameters.sslcontext)) - except TypeError: - opener = urllib2.build_opener(urllib2.HTTPSHandler()) - return opener - -def urlopen(url, data=None): - return get_opener().open(url, data) - -def pyopenssl_https_get(url): - """ - HTTPS GET-function to use when running old Python < 2.7 - """ - from OpenSSL import SSL - import socket - - # TLSv1 is the best we can get on Python 2.6 - context = SSL.Context(SSL.TLSv1_METHOD) - sock = SSL.Connection(context, socket.socket(socket.AF_INET, socket.SOCK_STREAM)) - - url_without_scheme = url.split('https://')[-1] - host = url_without_scheme.split('/')[0] - path = url_without_scheme.split('/', 1)[1] - http_get_request = ("GET /{path} HTTP/1.1\r\n" - "Host: {host}\r\n" - "\r\n" - ).format(path=path, host=host) - - sock.connect((host, 443)) - sock.write(http_get_request) - response = sock.recv(1024) - response_lines = response.rsplit('\n') - - # We are only interested in the actual response, - # without headers, contained in the last line. - return response_lines[len(response_lines) - 1] +def urlget(url, params=None): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=requests.packages.urllib3.exceptions.SubjectAltNameWarning) + return requests.get(url, verify=sslparameters.cafile, params=params) + +def urlpost(url, data): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=requests.packages.urllib3.exceptions.SubjectAltNameWarning) + return requests.post(url, data=data, verify=sslparameters.cafile) def get_sth(baseurl): - result = urlopen(baseurl + "ct/v1/get-sth").read() - return json.loads(result) + result = urlget(baseurl + "ct/v1/get-sth") + result.raise_for_status() + return result.json() def get_proof_by_hash(baseurl, hash, tree_size): - try: - params = urllib.urlencode({"hash":base64.b64encode(hash), - "tree_size":tree_size}) - result = \ - urlopen(baseurl + "ct/v1/get-proof-by-hash?" + params).read() - return json.loads(result) - except urllib2.HTTPError, e: - print "ERROR:", e.read() + params = {"hash":base64.b64encode(hash), + "tree_size":tree_size} + result = \ + urlget(baseurl + "ct/v1/get-proof-by-hash", params=params) + if result.status_code == requests.codes.ok: + return result.json() + else: + print "ERROR:", result.status_code, result.text sys.exit(1) def get_consistency_proof(baseurl, tree_size1, tree_size2): - try: - params = urllib.urlencode({"first":tree_size1, - "second":tree_size2}) - result = \ - urlopen(baseurl + "ct/v1/get-sth-consistency?" + params).read() - return json.loads(result)["consistency"] - except urllib2.HTTPError, e: - print "ERROR:", e.read() + params = {"first":tree_size1, + "second":tree_size2} + result = urlget(baseurl + "ct/v1/get-sth-consistency", params=params) + if result.status_code == requests.codes.ok: + return result.json()["consistency"] + else: + print "ERROR:", result.status_code, result.text sys.exit(1) def tls_array(data, length_len): @@ -179,13 +153,14 @@ def unpack_tls_array(packed_data, length_len): def add_chain(baseurl, submission): try: - result = urlopen(baseurl + "ct/v1/add-chain", json.dumps(submission)).read() - return json.loads(result) - except urllib2.HTTPError, e: - print "ERROR", e.code,":", e.read() - if e.code == 400: - return None - sys.exit(1) + result = urlpost(baseurl + "ct/v1/add-chain", json.dumps(submission)) + if result.status_code == requests.codes.ok: + return result.json() + else: + print "ERROR:", result.status_code, result.text + if result.status_code == 400: + return None + sys.exit(1) except ValueError, e: print "==== FAILED REQUEST ====" print submission @@ -196,14 +171,16 @@ def add_chain(baseurl, submission): def add_prechain(baseurl, submission): try: - result = urlopen(baseurl + "ct/v1/add-pre-chain", - json.dumps(submission)).read() - return json.loads(result) - except urllib2.HTTPError, e: - print "ERROR", e.code,":", e.read() - if e.code == 400: - return None - sys.exit(1) + result = urlpost(baseurl + "ct/v1/add-pre-chain", + json.dumps(submission)) + + if result.status_code == requests.codes.ok: + return result.json() + else: + print "ERROR:", result.status_code, result.text + if result.status_code == 400: + return None + sys.exit(1) except ValueError, e: print "==== FAILED REQUEST ====" print submission @@ -213,12 +190,12 @@ def add_prechain(baseurl, submission): raise e def get_entries(baseurl, start, end): - params = urllib.urlencode({"start":start, "end":end}) - try: - result = urlopen(baseurl + "ct/v1/get-entries?" + params).read() - return json.loads(result) - except urllib2.HTTPError, e: - print "ERROR:", e.read() + params = {"start":start, "end":end} + result = urlget(baseurl + "ct/v1/get-entries", params=params) + if result.status_code == requests.codes.ok: + return result.json() + else: + print "ERROR:", result.status_code, result.text sys.exit(1) def extract_precertificate(precert_chain_entry): @@ -283,27 +260,35 @@ def check_auth_header(authheader, expected_key, publickeydir, data, path): sigdecode=ecdsa.util.sigdecode_der) return True -def http_request(url, data=None, key=None, verifynode=None, publickeydir="."): - opener = get_opener() - - (keyname, keyfile) = key - privatekey = get_eckey_from_file(keyfile) - sk = ecdsa.SigningKey.from_der(privatekey) - parsed_url = urlparse.urlparse(url) - if data == None: - data_to_sign = parsed_url.query - method = "GET" - else: - data_to_sign = data - method = "POST" - signature = sk.sign("%s\0%s\0%s" % (method, parsed_url.path, data_to_sign), hashfunc=hashlib.sha256, - sigencode=ecdsa.util.sigencode_der) - opener.addheaders = [('X-Catlfish-Auth', base64.b64encode(signature) + ";key=" + keyname)] - result = opener.open(url, data) - authheader = result.info().get('X-Catlfish-Auth') - data = result.read() - check_auth_header(authheader, verifynode, publickeydir, data, parsed_url.path) - return data +def http_request(url, data=None, key=None, verifynode=None, publickeydir=".", params=None): + with requests.sessions.Session() as session: + (keyname, keyfile) = key + privatekey = get_eckey_from_file(keyfile) + sk = ecdsa.SigningKey.from_der(privatekey) + if data == None: + method = "GET" + else: + method = "POST" + assert(params == None) + req = requests.Request(method, url, params=params, data=data) + prepared_req = session.prepare_request(req) + parsed_url = urlparse.urlparse(prepared_req.url) + if data == None: + data_to_sign = parsed_url.query + else: + data_to_sign = data + url_to_sign = parsed_url.path + signature = sk.sign("%s\0%s\0%s" % (method, url_to_sign, data_to_sign), hashfunc=hashlib.sha256, + sigencode=ecdsa.util.sigencode_der) + prepared_req.headers['X-Catlfish-Auth'] = base64.b64encode(signature) + ";key=" + keyname + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=requests.packages.urllib3.exceptions.SubjectAltNameWarning) + result = session.send(prepared_req, verify=sslparameters.cafile) + result.raise_for_status() + authheader = result.headers.get('X-Catlfish-Auth') + data = result.text + check_auth_header(authheader, verifynode, publickeydir, data, url_to_sign) + return data def get_signature(baseurl, data, key=None): try: @@ -311,11 +296,8 @@ def get_signature(baseurl, data, key=None): result = http_request(baseurl + "plop/v1/signing/sth", params, key=key) parsed_result = json.loads(result) return base64.b64decode(parsed_result.get(u"result")) - except urllib2.URLError, e: - print >>sys.stderr, "ERROR: get_signature", e.reason - sys.exit(1) - except urllib2.HTTPError, e: - print "ERROR: get_signature", e.read() + except requests.exceptions.HTTPError, e: + print "ERROR: get_signature", e.response raise e def create_signature(baseurl, data, key=None): diff --git a/tools/merge_sth.py b/tools/merge_sth.py index 567082c..cf1e994 100755 --- a/tools/merge_sth.py +++ b/tools/merge_sth.py @@ -8,6 +8,7 @@ import sys import json import urllib2 import time +import requests from base64 import b64encode from mergetools import parse_args, get_nfetched, hexencode, hexdecode, \ get_logorder, get_sth @@ -83,8 +84,8 @@ def merge_sth(args, config, localconfig): "https://%s/" % signingnode["address"], key=own_key) break - except urllib2.URLError, err: - print >>sys.stderr, err + except requests.exceptions.HTTPError, e: + print >>sys.stderr, e.response sys.stderr.flush() if tree_head_signature == None: print >>sys.stderr, "Could not contact any signing nodes" diff --git a/tools/mergetools.py b/tools/mergetools.py index 3dbe517..f6e8bd5 100644 --- a/tools/mergetools.py +++ b/tools/mergetools.py @@ -6,11 +6,10 @@ import base64 import hashlib import sys import struct -import urllib -import urllib2 import json import yaml import argparse +import requests from certtools import get_leaf_hash, http_request, get_leaf_hash def parselogrow(row): @@ -172,18 +171,15 @@ def get_new_entries(node, baseurl, own_key, paths): entry in parsed_result[u"entries"]] print >>sys.stderr, "ERROR: fetchnewentries", parsed_result sys.exit(1) - except urllib2.URLError, e: - print >>sys.stderr, "ERROR: fetchnewentries", e.reason - sys.exit(1) - except urllib2.HTTPError, e: - print >>sys.stderr, "ERROR: fetchnewentries", e.read() + except requests.exceptions.HTTPError, e: + print >>sys.stderr, "ERROR: fetchnewentries", e.response sys.exit(1) def get_entries(node, baseurl, own_key, paths, hashes): try: - params = urllib.urlencode({"hash":[base64.b64encode(ehash) for \ - ehash in hashes]}, doseq=True) - result = http_request(baseurl + "plop/v1/storage/getentry?" + params, + params = {"hash":[base64.b64encode(ehash) for ehash in hashes]} + result = http_request(baseurl + "plop/v1/storage/getentry", + params=params, key=own_key, verifynode=node, publickeydir=paths["publickeys"]) parsed_result = json.loads(result) @@ -196,11 +192,8 @@ def get_entries(node, baseurl, own_key, paths, hashes): return entries print >>sys.stderr, "ERROR: getentry", parsed_result sys.exit(1) - except urllib2.URLError, e: - print >>sys.stderr, "ERROR: getentry", e.reason - sys.exit(1) - except urllib2.HTTPError, e: - print >>sys.stderr, "ERROR: getentry", e.read() + except requests.exceptions.HTTPError, e: + print >>sys.stderr, "ERROR: getentry", e.request.url, e.response sys.exit(1) def get_curpos(node, baseurl, own_key, paths): @@ -213,11 +206,8 @@ def get_curpos(node, baseurl, own_key, paths): return parsed_result[u"position"] print >>sys.stderr, "ERROR: currentposition", parsed_result sys.exit(1) - except urllib2.URLError, e: - print >>sys.stderr, "ERROR: currentposition", e.reason - sys.exit(1) - except urllib2.HTTPError, e: - print >>sys.stderr, "ERROR: currentposition", e.read() + except requests.exceptions.HTTPError, e: + print >>sys.stderr, "ERROR: currentposition", e.response sys.exit(1) def get_verifiedsize(node, baseurl, own_key, paths): @@ -230,11 +220,8 @@ def get_verifiedsize(node, baseurl, own_key, paths): return parsed_result[u"size"] print >>sys.stderr, "ERROR: verifiedsize", parsed_result sys.exit(1) - except urllib2.URLError, e: - print >>sys.stderr, "ERROR: verifiedsize", e.reason - sys.exit(1) - except urllib2.HTTPError, e: - print >>sys.stderr, "ERROR: verifiedsize", e.read() + except requests.exceptions.HTTPError, e: + print >>sys.stderr, "ERROR: verifiedsize", e.response sys.exit(1) @@ -244,11 +231,8 @@ def sendlog(node, baseurl, own_key, paths, submission): json.dumps(submission), key=own_key, verifynode=node, publickeydir=paths["publickeys"]) return json.loads(result) - except urllib2.URLError, e: - print >>sys.stderr, "ERROR: sendlog", e.reason - sys.exit(1) - except urllib2.HTTPError, e: - print >>sys.stderr, "ERROR: sendlog", e.read() + except requests.exceptions.HTTPError, e: + print >>sys.stderr, "ERROR: sendlog", e.response sys.stderr.flush() return None except ValueError, e: @@ -266,11 +250,8 @@ def backup_sendlog(node, baseurl, own_key, paths, submission): json.dumps(submission), key=own_key, verifynode=node, publickeydir=paths["publickeys"]) return json.loads(result) - except urllib2.URLError, e: - print >>sys.stderr, "ERROR: backup_sendlog", e.reason - sys.exit(1) - except urllib2.HTTPError, e: - print >>sys.stderr, "ERROR: backup_sendlog", e.read() + except requests.exceptions.HTTPError, e: + print >>sys.stderr, "ERROR: backup_sendlog", e.response sys.stderr.flush() return None except ValueError, e: @@ -290,11 +271,8 @@ def sendentry(node, baseurl, own_key, paths, entry, ehash): "treeleafhash":base64.b64encode(ehash)}), key=own_key, verifynode=node, publickeydir=paths["publickeys"]) return json.loads(result) - except urllib2.URLError, e: - print >>sys.stderr, "ERROR: sendentry", e.reason - sys.exit(1) - except urllib2.HTTPError, e: - print >>sys.stderr, "ERROR: sendentry", e.read() + except requests.exceptions.HTTPError, e: + print >>sys.stderr, "ERROR: sendentry", e.reponse sys.exit(1) except ValueError, e: print >>sys.stderr, "==== FAILED REQUEST ====" @@ -316,11 +294,8 @@ def sendentries_merge(node, baseurl, own_key, paths, entries): json.dumps(json_entries), key=own_key, verifynode=node, publickeydir=paths["publickeys"]) return json.loads(result) - except urllib2.URLError, e: - print >>sys.stderr, "ERROR: sendentry_merge", e.reason - sys.exit(1) - except urllib2.HTTPError, e: - print >>sys.stderr, "ERROR: sendentry_merge", e.read() + except requests.exceptions.HTTPError, e: + print >>sys.stderr, "ERROR: sendentry_merge", e.response sys.exit(1) except ValueError, e: print >>sys.stderr, "==== FAILED REQUEST ====" @@ -337,8 +312,8 @@ def sendsth(node, baseurl, own_key, paths, submission): json.dumps(submission), key=own_key, verifynode=node, publickeydir=paths["publickeys"]) return json.loads(result) - except urllib2.HTTPError, e: - print >>sys.stderr, "ERROR: sendsth", e.read() + except requests.exceptions.HTTPError, e: + print >>sys.stderr, "ERROR: sendsth", e.response sys.exit(1) except ValueError, e: print >>sys.stderr, "==== FAILED REQUEST ====" @@ -355,11 +330,8 @@ def verifyroot(node, baseurl, own_key, paths, treesize): json.dumps({"tree_size":treesize}), key=own_key, verifynode=node, publickeydir=paths["publickeys"]) return json.loads(result) - except urllib2.URLError, e: - print >>sys.stderr, "ERROR: verifyroot", e.reason - sys.exit(1) - except urllib2.HTTPError, e: - print >>sys.stderr, "ERROR: verifyroot", e.read() + except requests.exceptions.HTTPError, e: + print >>sys.stderr, "ERROR: verifyroot", e.response sys.exit(1) except ValueError, e: print >>sys.stderr, "==== FAILED REQUEST ====" @@ -376,11 +348,8 @@ def setverifiedsize(node, baseurl, own_key, paths, treesize): json.dumps({"size":treesize}), key=own_key, verifynode=node, publickeydir=paths["publickeys"]) return json.loads(result) - except urllib2.URLError, e: - print >>sys.stderr, "ERROR: setverifiedsize", e.reason - sys.exit(1) - except urllib2.HTTPError, e: - print >>sys.stderr, "ERROR: setverifiedsize", e.read() + except requests.exceptions.HTTPError, e: + print >>sys.stderr, "ERROR: setverifiedsize", e.response sys.exit(1) except ValueError, e: print >>sys.stderr, "==== FAILED REQUEST ====" @@ -401,11 +370,8 @@ def get_missingentries(node, baseurl, own_key, paths): return parsed_result[u"entries"] print >>sys.stderr, "ERROR: missingentries", parsed_result sys.exit(1) - except urllib2.URLError, e: - print >>sys.stderr, "ERROR: missingentries", e.reason - sys.exit(1) - except urllib2.HTTPError, e: - print >>sys.stderr, "ERROR: missingentries", e.read() + except requests.exceptions.HTTPError, e: + print >>sys.stderr, "ERROR: missingentries", e.response sys.exit(1) def get_missingentriesforbackup(node, baseurl, own_key, paths): @@ -418,11 +384,8 @@ def get_missingentriesforbackup(node, baseurl, own_key, paths): return parsed_result[u"entries"] print >>sys.stderr, "ERROR: missingentriesforbackup", parsed_result sys.exit(1) - except urllib2.URLError, e: - print >>sys.stderr, "ERROR: missingentriesforbackup", e.reason - sys.exit(1) - except urllib2.HTTPError, e: - print >>sys.stderr, "ERROR: missingentriesforbackup", e.read() + except requests.exceptions.HTTPError, e: + print >>sys.stderr, "ERROR: missingentriesforbackup", e.response sys.exit(1) def chunks(l, n): -- cgit v1.1 From d9445aedc64d53192a96a86bd624bb7a2e31208b Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Wed, 9 Mar 2016 07:47:52 +0100 Subject: Make it work with older python-requests --- tools/certtools.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/certtools.py b/tools/certtools.py index 919460e..69e376d 100644 --- a/tools/certtools.py +++ b/tools/certtools.py @@ -102,12 +102,18 @@ def create_ssl_context(cafile=None): def urlget(url, params=None): with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=requests.packages.urllib3.exceptions.SubjectAltNameWarning) + try: + warnings.filterwarnings("ignore", category=requests.packages.urllib3.exceptions.SubjectAltNameWarning) + except AttributeError: + pass return requests.get(url, verify=sslparameters.cafile, params=params) def urlpost(url, data): with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=requests.packages.urllib3.exceptions.SubjectAltNameWarning) + try: + warnings.filterwarnings("ignore", category=requests.packages.urllib3.exceptions.SubjectAltNameWarning) + except AttributeError: + pass return requests.post(url, data=data, verify=sslparameters.cafile) def get_sth(baseurl): @@ -282,7 +288,10 @@ def http_request(url, data=None, key=None, verifynode=None, publickeydir=".", pa sigencode=ecdsa.util.sigencode_der) prepared_req.headers['X-Catlfish-Auth'] = base64.b64encode(signature) + ";key=" + keyname with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=requests.packages.urllib3.exceptions.SubjectAltNameWarning) + try: + warnings.filterwarnings("ignore", category=requests.packages.urllib3.exceptions.SubjectAltNameWarning) + except AttributeError: + pass result = session.send(prepared_req, verify=sslparameters.cafile) result.raise_for_status() authheader = result.headers.get('X-Catlfish-Auth') -- cgit v1.1 From 3629b8bd60f9d14828bcd6a0feb3f948c95351f0 Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Tue, 15 Mar 2016 12:46:49 +0100 Subject: Make http_request take optional session parameter --- tools/certtools.py | 70 +++++++++++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/tools/certtools.py b/tools/certtools.py index 69e376d..023bc1e 100644 --- a/tools/certtools.py +++ b/tools/certtools.py @@ -266,38 +266,44 @@ def check_auth_header(authheader, expected_key, publickeydir, data, path): sigdecode=ecdsa.util.sigdecode_der) return True -def http_request(url, data=None, key=None, verifynode=None, publickeydir=".", params=None): - with requests.sessions.Session() as session: - (keyname, keyfile) = key - privatekey = get_eckey_from_file(keyfile) - sk = ecdsa.SigningKey.from_der(privatekey) - if data == None: - method = "GET" - else: - method = "POST" - assert(params == None) - req = requests.Request(method, url, params=params, data=data) - prepared_req = session.prepare_request(req) - parsed_url = urlparse.urlparse(prepared_req.url) - if data == None: - data_to_sign = parsed_url.query - else: - data_to_sign = data - url_to_sign = parsed_url.path - signature = sk.sign("%s\0%s\0%s" % (method, url_to_sign, data_to_sign), hashfunc=hashlib.sha256, - sigencode=ecdsa.util.sigencode_der) - prepared_req.headers['X-Catlfish-Auth'] = base64.b64encode(signature) + ";key=" + keyname - with warnings.catch_warnings(): - try: - warnings.filterwarnings("ignore", category=requests.packages.urllib3.exceptions.SubjectAltNameWarning) - except AttributeError: - pass - result = session.send(prepared_req, verify=sslparameters.cafile) - result.raise_for_status() - authheader = result.headers.get('X-Catlfish-Auth') - data = result.text - check_auth_header(authheader, verifynode, publickeydir, data, url_to_sign) - return data +def http_request(url, data=None, key=None, verifynode=None, publickeydir=".", params=None, session=None): + if session: + return http_request_session(url, data=data, key=key, verifynode=verifynode, publickeydir=publickeydir, params=params, session=session) + else: + with requests.sessions.Session() as session: + return http_request_session(url, data=data, key=key, verifynode=verifynode, publickeydir=publickeydir, params=params, session=session) + +def http_request_session(url, data=None, key=None, verifynode=None, publickeydir=".", params=None, session=None): + (keyname, keyfile) = key + privatekey = get_eckey_from_file(keyfile) + sk = ecdsa.SigningKey.from_der(privatekey) + if data == None: + method = "GET" + else: + method = "POST" + assert(params == None) + req = requests.Request(method, url, params=params, data=data) + prepared_req = session.prepare_request(req) + parsed_url = urlparse.urlparse(prepared_req.url) + if data == None: + data_to_sign = parsed_url.query + else: + data_to_sign = data + url_to_sign = parsed_url.path + signature = sk.sign("%s\0%s\0%s" % (method, url_to_sign, data_to_sign), hashfunc=hashlib.sha256, + sigencode=ecdsa.util.sigencode_der) + prepared_req.headers['X-Catlfish-Auth'] = base64.b64encode(signature) + ";key=" + keyname + with warnings.catch_warnings(): + try: + warnings.filterwarnings("ignore", category=requests.packages.urllib3.exceptions.SubjectAltNameWarning) + except AttributeError: + pass + result = session.send(prepared_req, verify=sslparameters.cafile) + result.raise_for_status() + authheader = result.headers.get('X-Catlfish-Auth') + data = result.text + check_auth_header(authheader, verifynode, publickeydir, data, url_to_sign) + return data def get_signature(baseurl, data, key=None): try: -- cgit v1.1 From 06c6290ac4f0507374dfbf703e6577dfe48dfae7 Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Tue, 15 Mar 2016 12:52:51 +0100 Subject: Do detection of where log ends before sending new hashes Try to send entries until missing entries is empty --- tools/merge_backup.py | 100 +++++++++++++++++++++++++++++++++----------------- tools/mergetools.py | 5 ++- 2 files changed, 70 insertions(+), 35 deletions(-) diff --git a/tools/merge_backup.py b/tools/merge_backup.py index 123347a..0c283e5 100755 --- a/tools/merge_backup.py +++ b/tools/merge_backup.py @@ -7,6 +7,7 @@ import sys import base64 import select +import requests from time import sleep from certtools import timing_point, build_merkle_tree, write_file, \ create_ssl_context @@ -15,6 +16,21 @@ from mergetools import chunks, backup_sendlog, get_logorder, \ hexencode, setverifiedsize, sendentries_merge, verifyroot, \ get_nfetched, parse_args +def backup_loop(nodename, nodeaddress, own_key, paths, verifiedsize, chunk): + for trynumber in range(5, 0, -1): + sendlogresult = \ + backup_sendlog(nodename, nodeaddress, own_key, paths, + {"start": verifiedsize, "hashes": chunk}) + if sendlogresult == None: + if trynumber == 1: + return None + select.select([], [], [], 10.0) + print >>sys.stderr, "tries left:", trynumber + sys.stderr.flush() + continue + return sendlogresult + + def merge_backup(args, config, localconfig, secondaries): paths = localconfig["paths"] own_key = (localconfig["nodename"], @@ -27,6 +43,7 @@ def merge_backup(args, config, localconfig, secondaries): timing = timing_point() nfetched = get_nfetched(currentsizefile, logorderfile) + timing_point(timing, "get nfetched") logorder = get_logorder(logorderfile, nfetched) tree_size = len(logorder) timing_point(timing, "get logorder") @@ -49,21 +66,34 @@ def merge_backup(args, config, localconfig, secondaries): sys.stderr.flush() entries = [base64.b64encode(entry) for entry in logorder[verifiedsize:]] + + print >>sys.stderr, "determining end of log:", + for chunk in chunks(entries, 100000): + sendlogresult = backup_loop(nodename, nodeaddress, own_key, paths, verifiedsize, chunk[:10]) + if sendlogresult == None: + print >>sys.stderr, "sendlog result was None" + sys.exit(1) + if sendlogresult["result"] != "ok": + print >>sys.stderr, "backup_sendlog:", sendlogresult + sys.exit(1) + verifiedsize += len(chunk) + print >>sys.stderr, verifiedsize, + sys.stderr.flush() + + if verifiedsize > 100000: + verifiedsize -= 100000 + else: + verifiedsize = 0 + + timing_point(timing, "checklog") + + entries = [base64.b64encode(entry) for entry in logorder[verifiedsize:]] print >>sys.stderr, "sending log:", sys.stderr.flush() for chunk in chunks(entries, 1000): - for trynumber in range(5, 0, -1): - sendlogresult = \ - backup_sendlog(nodename, nodeaddress, own_key, paths, - {"start": verifiedsize, "hashes": chunk}) - if sendlogresult == None: - if trynumber == 1: - sys.exit(1) - select.select([], [], [], 10.0) - print >>sys.stderr, "tries left:", trynumber - sys.stderr.flush() - continue - break + sendlogresult = backup_loop(nodename, nodeaddress, own_key, paths, verifiedsize, chunk) + if sendlogresult == None: + sys.exit(1) if sendlogresult["result"] != "ok": print >>sys.stderr, "backup_sendlog:", sendlogresult sys.exit(1) @@ -78,28 +108,32 @@ def merge_backup(args, config, localconfig, secondaries): missingentries = get_missingentriesforbackup(nodename, nodeaddress, own_key, paths) timing_point(timing, "get missing") - print >>sys.stderr, "missing entries:", len(missingentries) - sys.stderr.flush() - fetched_entries = 0 - print >>sys.stderr, "fetching missing entries", - sys.stderr.flush() - for missingentry_chunk in chunks(missingentries, 100): - missingentry_hashes = [base64.b64decode(missingentry) for missingentry in missingentry_chunk] - hashes_and_entries = [(hash, read_chain(chainsdir, hash)) for hash in missingentry_hashes] - sendentryresult = sendentries_merge(nodename, nodeaddress, - own_key, paths, - hashes_and_entries) - if sendentryresult["result"] != "ok": - print >>sys.stderr, "sendentry_merge:", sendentryresult - sys.exit(1) - fetched_entries += 1 - if fetched_entries % 1000 == 0: - print >>sys.stderr, fetched_entries, - sys.stderr.flush() - print >>sys.stderr - sys.stderr.flush() - timing_point(timing, "send missing") + while missingentries: + print >>sys.stderr, "missing entries:", len(missingentries) + sys.stderr.flush() + + print >>sys.stderr, "fetching missing entries", + sys.stderr.flush() + with requests.sessions.Session() as session: + for missingentry_chunk in chunks(missingentries, 100): + missingentry_hashes = [base64.b64decode(missingentry) for missingentry in missingentry_chunk] + hashes_and_entries = [(hash, read_chain(chainsdir, hash)) for hash in missingentry_hashes] + sendentryresult = sendentries_merge(nodename, nodeaddress, + own_key, paths, + hashes_and_entries, session) + if sendentryresult["result"] != "ok": + print >>sys.stderr, "sendentry_merge:", sendentryresult + sys.exit(1) + print >>sys.stderr, fetched_entries, + sys.stderr.flush() + print >>sys.stderr + sys.stderr.flush() + timing_point(timing, "send missing") + + missingentries = get_missingentriesforbackup(nodename, nodeaddress, + own_key, paths) + timing_point(timing, "get missing") verifyrootresult = verifyroot(nodename, nodeaddress, own_key, paths, tree_size) diff --git a/tools/mergetools.py b/tools/mergetools.py index f6e8bd5..ec4fd2a 100644 --- a/tools/mergetools.py +++ b/tools/mergetools.py @@ -286,13 +286,14 @@ def sendentry(node, baseurl, own_key, paths, entry, ehash): def sendentry_merge(node, baseurl, own_key, paths, entry, ehash): return sendentries_merge(node, baseurl, own_key, paths, [(ehash, entry)]) -def sendentries_merge(node, baseurl, own_key, paths, entries): +def sendentries_merge(node, baseurl, own_key, paths, entries, session=None): try: json_entries = [{"entry":base64.b64encode(entry), "treeleafhash":base64.b64encode(hash)} for hash, entry in entries] result = http_request( baseurl + "plop/v1/merge/sendentry", json.dumps(json_entries), - key=own_key, verifynode=node, publickeydir=paths["publickeys"]) + key=own_key, verifynode=node, publickeydir=paths["publickeys"], + session=session) return json.loads(result) except requests.exceptions.HTTPError, e: print >>sys.stderr, "ERROR: sendentry_merge", e.response -- cgit v1.1 From b1d0d1bf04c7cb49d1e80eaac73886cf4ad5b1dc Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Tue, 15 Mar 2016 13:55:51 +0100 Subject: bugfix: re-added fetched_entries --- tools/merge_backup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/merge_backup.py b/tools/merge_backup.py index 0c283e5..48ac133 100755 --- a/tools/merge_backup.py +++ b/tools/merge_backup.py @@ -113,6 +113,7 @@ def merge_backup(args, config, localconfig, secondaries): print >>sys.stderr, "missing entries:", len(missingentries) sys.stderr.flush() + fetched_entries = 0 print >>sys.stderr, "fetching missing entries", sys.stderr.flush() with requests.sessions.Session() as session: @@ -125,6 +126,7 @@ def merge_backup(args, config, localconfig, secondaries): if sendentryresult["result"] != "ok": print >>sys.stderr, "sendentry_merge:", sendentryresult sys.exit(1) + fetched_entries += len(missingentry_hashes) print >>sys.stderr, fetched_entries, sys.stderr.flush() print >>sys.stderr -- cgit v1.1 From b97c5ca89ed619c1463bd9932d77f34ec59e7171 Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Thu, 17 Mar 2016 16:59:54 +0100 Subject: Limit HTTP chunk size to 900000 bytes --- tools/certtools.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/certtools.py b/tools/certtools.py index 023bc1e..1523c97 100644 --- a/tools/certtools.py +++ b/tools/certtools.py @@ -273,16 +273,23 @@ def http_request(url, data=None, key=None, verifynode=None, publickeydir=".", pa with requests.sessions.Session() as session: return http_request_session(url, data=data, key=key, verifynode=verifynode, publickeydir=publickeydir, params=params, session=session) +def chunk_generator(data, maxsize): + while len(data): + yield data[:maxsize] + data = data[maxsize:] + def http_request_session(url, data=None, key=None, verifynode=None, publickeydir=".", params=None, session=None): (keyname, keyfile) = key privatekey = get_eckey_from_file(keyfile) sk = ecdsa.SigningKey.from_der(privatekey) if data == None: method = "GET" + chunked_data = None else: method = "POST" + chunked_data = chunk_generator(data, 900000) assert(params == None) - req = requests.Request(method, url, params=params, data=data) + req = requests.Request(method, url, params=params, data=chunked_data) prepared_req = session.prepare_request(req) parsed_url = urlparse.urlparse(prepared_req.url) if data == None: -- cgit v1.1 From 9a8f8bd8da784622fc340c8466435551a2d2d268 Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Fri, 18 Mar 2016 16:04:40 +0100 Subject: Added description of current merge implementation --- doc/merge.txt | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/doc/merge.txt b/doc/merge.txt index 28757a7..b2e2738 100644 --- a/doc/merge.txt +++ b/doc/merge.txt @@ -20,6 +20,66 @@ The merge process - merge-dist distributes 'sth' and missing entries to frontend nodes. +Merge distribution (merge_dist) +----------------------------------------------------- + + * get current position from frontend server (curpos) + + * send log + * sends log in chunks of 1000 hashes from curpos + + * get missing entries + * server goes through all hashes from curpos and checks if they are + present + * when the server has collected 100000 non-present entries, it + returns them + * server also keep a separate (in-memory) counter that caches the + index of the first entry that either hasn't been checked if it is + present or not, or that is checked and found to be non-present, + to allow the server to start from that position + + * send entries + * send these entries one at a time + * does not get more missing entries when it is done + + * send sth + * sends the previously (merge-sth) constructed sth to the server, + which verifies all entries and adds entry-to-hash and + hash-to-index + * saves the last verified position continuously to avoid doing the + work again if the verification is aborted and restarted + +Merge backup (merge_backup) +----------------------------------------------------- + + * get verifiedsize from backup server + + * send log: + * determines the end of the log by trying to send small chunks of + the log hashes from verifiedsize until it fails, then restarts + with the normal chunk size (1000) + + * get missing entries + * this stage is the same as for merge_dist + + * send entries + * send these entries in chunks of 100 at a time (this is limited + because of memory considerations and web server limits) + * when it is done, goes back to the "get missing entries" stage, + until there are no more missing entries + + * verifyroot + * server verifies all entries from verifiedsize, and then + calculates and returns root hash + * unlike merge distribution, does not save the last verified + position either continuously or when it is finished, which means + that it then has to verify all entries again if it is aborted and + restarted before verifiedsize is set to the new value + + * if merge_backup sees that the root hash is correct, it sets + verifiedsize on backup server + + TODO ==== -- cgit v1.1 From bbf254d6d7f1708503f425c0eb8926af1b715b9c Mon Sep 17 00:00:00 2001 From: Magnus Ahltorp Date: Fri, 18 Mar 2016 16:05:24 +0100 Subject: Added commented out permdb option for merge secondary test --- test/catlfish-test-local-merge-2.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/catlfish-test-local-merge-2.cfg b/test/catlfish-test-local-merge-2.cfg index 28c4eda..7096619 100644 --- a/test/catlfish-test-local-merge-2.cfg +++ b/test/catlfish-test-local-merge-2.cfg @@ -6,6 +6,8 @@ addresses: nodename: merge-2 +#dbbackend: permdb + paths: configdir: . knownroots: tests/known_roots -- cgit v1.1