summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorLinus Nordberg <linus@nordu.net>2016-11-26 22:43:13 +0100
committerLinus Nordberg <linus@nordu.net>2016-11-26 22:43:13 +0100
commit8b96f7c91591ac0c667538a9c9316f28891ccc72 (patch)
tree9fe8f6376fd4e4b391946edc78dffb5dde7a3f4b /tools
parentaf5ada8e45b992093947b8c370217ddac7165367 (diff)
Add back chunking for get_entries in parallelised merge_fetch.
Even if it's not important for parallelism on the merge node, it's not a bad idea to keep the requests from growing.
Diffstat (limited to 'tools')
-rwxr-xr-xtools/merge_fetch.py19
1 files changed, 9 insertions, 10 deletions
diff --git a/tools/merge_fetch.py b/tools/merge_fetch.py
index 10fd406..be9edce 100755
--- a/tools/merge_fetch.py
+++ b/tools/merge_fetch.py
@@ -127,17 +127,16 @@ def merge_fetch_worker(args, localconfig, storagenode, pipe):
if to_fetch:
logging.info("%s: fetching %d entries", storagenode["name"],
len(to_fetch))
- fetchlist = list(to_fetch)
with requests.sessions.Session() as session:
- entries = get_entries(storagenode["name"],
- "https://%s/" % storagenode["address"],
- own_key, paths, fetchlist,
- session=session)
- for ehash in fetchlist:
- entry = entries[ehash]
- verify_entry(verifycert, entry, ehash)
- pipe.send(('FETCHED', ehash, entry))
- to_fetch.remove(ehash)
+ for chunk in chunks(list(to_fetch), 100):
+ entries = get_entries(storagenode["name"],
+ "https://%s/" % storagenode["address"],
+ own_key, paths, chunk, session=session)
+ for ehash in chunk:
+ entry = entries[ehash]
+ verify_entry(verifycert, entry, ehash)
+ pipe.send(('FETCHED', ehash, entry))
+ to_fetch.remove(ehash)
new_entries = get_new_entries(storagenode["name"],
"https://%s/" % storagenode["address"],