Pull search algorithm out into another module

pull/19465/head
Richard Belleville 6 years ago
parent 42b2fe154a
commit 7fa7f932e3
  1. 17
      examples/python/cancellation/BUILD.bazel
  2. 158
      examples/python/cancellation/search.py
  3. 151
      examples/python/cancellation/server.py

@ -19,12 +19,14 @@ load("//bazel:python_rules.bzl", "py_proto_library")
proto_library( proto_library(
name = "hash_name_proto", name = "hash_name_proto",
srcs = ["hash_name.proto"] srcs = ["hash_name.proto"],
testonly = 1,
) )
py_proto_library( py_proto_library(
name = "hash_name_proto_pb2", name = "hash_name_proto_pb2",
deps = [":hash_name_proto"], deps = [":hash_name_proto"],
testonly = 1,
well_known_protos = False, well_known_protos = False,
) )
@ -39,13 +41,24 @@ py_binary(
srcs_version = "PY2AND3", srcs_version = "PY2AND3",
) )
py_library(
name = "search",
srcs = ["search.py"],
srcs_version = "PY2AND3",
deps = [
":hash_name_proto_pb2",
],
testonly = 1,
)
py_binary( py_binary(
name = "server", name = "server",
testonly = 1, testonly = 1,
srcs = ["server.py"], srcs = ["server.py"],
deps = [ deps = [
"//src/python/grpcio/grpc:grpcio", "//src/python/grpcio/grpc:grpcio",
":hash_name_proto_pb2" ":hash_name_proto_pb2",
":search",
] + select({ ] + select({
"//conditions:default": [requirement("futures")], "//conditions:default": [requirement("futures")],
"//:python3": [], "//:python3": [],

@ -0,0 +1,158 @@
# Copyright the 2019 gRPC authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A search algorithm over the space of all bytestrings."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import base64
import hashlib
import logging
import struct
from examples.python.cancellation import hash_name_pb2
_LOGGER = logging.getLogger(__name__)
_BYTE_MAX = 255
def _get_hamming_distance(a, b):
"""Calculates hamming distance between strings of equal length."""
distance = 0
for char_a, char_b in zip(a, b):
if char_a.lower() != char_b.lower():
distance += 1
return distance
def _get_substring_hamming_distance(candidate, target):
"""Calculates the minimum hamming distance between between the target
and any substring of the candidate.
Args:
candidate: The string whose substrings will be tested.
target: The target string.
Returns:
The minimum Hamming distance between candidate and target.
"""
min_distance = None
for i in range(len(candidate) - len(target) + 1):
distance = _get_hamming_distance(candidate[i:i + len(target)], target)
if min_distance is None or distance < min_distance:
min_distance = distance
return min_distance
def _get_hash(secret):
hasher = hashlib.sha1()
hasher.update(secret)
return base64.b64encode(hasher.digest()).decode('ascii')
class ResourceLimitExceededError(Exception):
"""Signifies the request has exceeded configured limits."""
def _bytestrings_of_length(length):
"""Generates a stream containing all bytestrings of a given length.
Args:
length: A positive integer length.
Yields:
All bytestrings of length `length`.
"""
digits = [0] * length
while True:
yield b''.join(struct.pack('B', i) for i in digits)
digits[-1] += 1
i = length - 1
while digits[i] == _BYTE_MAX + 1:
digits[i] = 0
i -= 1
if i == -1:
# Terminate the generator since we've run out of strings of
# `length` bytes.
raise StopIteration() # pylint: disable=stop-iteration-return
else:
digits[i] += 1
def _all_bytestrings():
"""Generates a stream containing all possible bytestrings.
This generator does not terminate.
Yields:
All bytestrings in ascending order of length.
"""
length = 1
while True:
for bytestring in _bytestrings_of_length(length):
yield bytestring
length += 1
def search(target,
ideal_distance,
stop_event,
maximum_hashes,
interesting_hamming_distance=None):
"""Find candidate strings.
Search through the space of all bytestrings, in order of increasing length,
indefinitely, until a hash with a Hamming distance of `maximum_distance` or
less has been found.
Args:
target: The search string.
ideal_distance: The desired Hamming distance.
stop_event: An event indicating whether the RPC should terminate.
maximum_hashes: The maximum number of hashes to check before stopping.
interesting_hamming_distance: If specified, strings with a Hamming
distance from the target below this value will be yielded.
Yields:
Instances of HashNameResponse. The final entry in the stream will be of
`maximum_distance` Hamming distance or less from the target string,
while all others will be of less than `interesting_hamming_distance`.
Raises:
ResourceLimitExceededError: If the computation exceeds `maximum_hashes`
iterations.
"""
hashes_computed = 0
for secret in _all_bytestrings():
if stop_event.is_set():
raise StopIteration() # pylint: disable=stop-iteration-return
candidate_hash = _get_hash(secret)
distance = _get_substring_hamming_distance(candidate_hash, target)
if interesting_hamming_distance is not None and distance <= interesting_hamming_distance:
# Surface interesting candidates, but don't stop.
yield hash_name_pb2.HashNameResponse(
secret=base64.b64encode(secret),
hashed_name=candidate_hash,
hamming_distance=distance)
elif distance <= ideal_distance:
# Yield ideal candidate and end the stream.
yield hash_name_pb2.HashNameResponse(
secret=base64.b64encode(secret),
hashed_name=candidate_hash,
hamming_distance=distance)
raise StopIteration() # pylint: disable=stop-iteration-return
hashes_computed += 1
if hashes_computed == maximum_hashes:
raise ResourceLimitExceededError()

@ -19,21 +19,17 @@ from __future__ import print_function
from concurrent import futures from concurrent import futures
import argparse import argparse
import base64
import contextlib import contextlib
import logging import logging
import hashlib
import struct
import time import time
import threading import threading
import grpc import grpc
import search
from examples.python.cancellation import hash_name_pb2 from examples.python.cancellation import hash_name_pb2
from examples.python.cancellation import hash_name_pb2_grpc from examples.python.cancellation import hash_name_pb2_grpc
_BYTE_MAX = 255
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
_SERVER_HOST = 'localhost' _SERVER_HOST = 'localhost'
_ONE_DAY_IN_SECONDS = 60 * 60 * 24 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
@ -41,139 +37,6 @@ _ONE_DAY_IN_SECONDS = 60 * 60 * 24
_DESCRIPTION = "A server for finding hashes similar to names." _DESCRIPTION = "A server for finding hashes similar to names."
def _get_hamming_distance(a, b):
"""Calculates hamming distance between strings of equal length."""
assert len(a) == len(b), "'{}', '{}'".format(a, b)
distance = 0
for char_a, char_b in zip(a, b):
if char_a.lower() != char_b.lower():
distance += 1
return distance
def _get_substring_hamming_distance(candidate, target):
"""Calculates the minimum hamming distance between between the target
and any substring of the candidate.
Args:
candidate: The string whose substrings will be tested.
target: The target string.
Returns:
The minimum Hamming distance between candidate and target.
"""
assert len(target) <= len(candidate)
assert candidate
min_distance = None
for i in range(len(candidate) - len(target) + 1):
distance = _get_hamming_distance(candidate[i:i + len(target)], target)
if min_distance is None or distance < min_distance:
min_distance = distance
return min_distance
def _get_hash(secret):
hasher = hashlib.sha1()
hasher.update(secret)
return base64.b64encode(hasher.digest()).decode('ascii')
class ResourceLimitExceededError(Exception):
"""Signifies the request has exceeded configured limits."""
def _bytestrings_of_length(length):
"""Generates a stream containing all bytestrings of a given length.
Args:
length: A positive integer length.
Yields:
All bytestrings of length `length`.
"""
digits = [0] * length
while True:
yield b''.join(struct.pack('B', i) for i in digits)
digits[-1] += 1
i = length - 1
while digits[i] == _BYTE_MAX + 1:
digits[i] = 0
i -= 1
if i == -1:
# Terminate the generator since we've run out of strings of
# `length` bytes.
raise StopIteration() # pylint: disable=stop-iteration-return
else:
digits[i] += 1
def _all_bytestrings():
"""Generates a stream containing all possible bytestrings.
This generator does not terminate.
Yields:
All bytestrings in ascending order of length.
"""
length = 1
while True:
for bytestring in _bytestrings_of_length(length):
yield bytestring
length += 1
def _find_secret(target,
ideal_distance,
stop_event,
maximum_hashes,
interesting_hamming_distance=None):
"""Find candidate strings.
Search through the space of all bytestrings, in order of increasing length,
indefinitely, until a hash with a Hamming distance of `maximum_distance` or
less has been found.
Args:
target: The search string.
ideal_distance: The desired Hamming distance.
stop_event: An event indicating whether the RPC should terminate.
maximum_hashes: The maximum number of hashes to check before stopping.
interesting_hamming_distance: If specified, strings with a Hamming
distance from the target below this value will be yielded.
Yields:
Instances of HashNameResponse. The final entry in the stream will be of
`maximum_distance` Hamming distance or less from the target string,
while all others will be of less than `interesting_hamming_distance`.
Raises:
ResourceLimitExceededError: If the computation exceeds `maximum_hashes`
iterations.
"""
hashes_computed = 0
for secret in _all_bytestrings():
if stop_event.is_set():
raise StopIteration() # pylint: disable=stop-iteration-return
candidate_hash = _get_hash(secret)
distance = _get_substring_hamming_distance(candidate_hash, target)
if interesting_hamming_distance is not None and distance <= interesting_hamming_distance:
# Surface interesting candidates, but don't stop.
yield hash_name_pb2.HashNameResponse(
secret=base64.b64encode(secret),
hashed_name=candidate_hash,
hamming_distance=distance)
elif distance <= ideal_distance:
# Yield ideal candidate and end the stream.
yield hash_name_pb2.HashNameResponse(
secret=base64.b64encode(secret),
hashed_name=candidate_hash,
hamming_distance=distance)
raise StopIteration() # pylint: disable=stop-iteration-return
hashes_computed += 1
if hashes_computed == maximum_hashes:
raise ResourceLimitExceededError()
class HashFinder(hash_name_pb2_grpc.HashFinderServicer): class HashFinder(hash_name_pb2_grpc.HashFinderServicer):
def __init__(self, maximum_hashes): def __init__(self, maximum_hashes):
@ -191,10 +54,10 @@ class HashFinder(hash_name_pb2_grpc.HashFinderServicer):
candidates = [] candidates = []
try: try:
candidates = list( candidates = list(
_find_secret(request.desired_name, search.search(request.desired_name,
request.ideal_hamming_distance, stop_event, request.ideal_hamming_distance, stop_event,
self._maximum_hashes)) self._maximum_hashes))
except ResourceLimitExceededError: except search.ResourceLimitExceededError:
_LOGGER.info("Cancelling RPC due to exhausted resources.") _LOGGER.info("Cancelling RPC due to exhausted resources.")
context.cancel() context.cancel()
_LOGGER.debug("Servicer thread returning.") _LOGGER.debug("Servicer thread returning.")
@ -210,7 +73,7 @@ class HashFinder(hash_name_pb2_grpc.HashFinderServicer):
stop_event.set() stop_event.set()
context.add_callback(on_rpc_done) context.add_callback(on_rpc_done)
secret_generator = _find_secret( secret_generator = search.search(
request.desired_name, request.desired_name,
request.ideal_hamming_distance, request.ideal_hamming_distance,
stop_event, stop_event,
@ -219,7 +82,7 @@ class HashFinder(hash_name_pb2_grpc.HashFinderServicer):
try: try:
for candidate in secret_generator: for candidate in secret_generator:
yield candidate yield candidate
except ResourceLimitExceededError: except search.ResourceLimitExceededError:
_LOGGER.info("Cancelling RPC due to exhausted resources.") _LOGGER.info("Cancelling RPC due to exhausted resources.")
context.cancel() context.cancel()
_LOGGER.debug("Regained servicer thread.") _LOGGER.debug("Regained servicer thread.")

Loading…
Cancel
Save