@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import functools
import json
import logging
@ -22,11 +23,14 @@ from typing import List, Optional, Tuple
from kubernetes import client
from kubernetes import utils
import kubernetes . config
# TODO(sergiitk): replace with tenacity
import retrying
import yaml
from framework . helpers import retryers
import framework . helpers . highlighter
logger = logging . getLogger ( __name__ )
# Type aliases
_HighlighterYaml = framework . helpers . highlighter . HighlighterYaml
V1Deployment = client . V1Deployment
V1ServiceAccount = client . V1ServiceAccount
V1Pod = client . V1Pod
@ -181,6 +185,7 @@ class KubernetesNamespace: # pylint: disable=too-many-public-methods
WAIT_LONG_SLEEP_SEC : int = 30
def __init__ ( self , api : KubernetesApiManager , name : str ) :
self . _highlighter = _HighlighterYaml ( )
self . name = name
self . api = api
@ -230,71 +235,51 @@ class KubernetesNamespace: # pylint: disable=too-many-public-methods
def wait_for_service_deleted ( self ,
name : str ,
timeout_sec = WAIT_SHORT_TIMEOUT_SEC ,
wait_sec = WAIT_SHORT_SLEEP_SEC ) :
@retrying . retry ( retry_on_result = lambda r : r is not None ,
stop_max_delay = timeout_sec * 1000 ,
wait_fixed = wait_sec * 1000 )
def _wait_for_deleted_service_with_retry ( ) :
service = self . get_service ( name )
if service is not None :
logger . debug ( ' Waiting for service %s to be deleted ' ,
service . metadata . name )
return service
_wait_for_deleted_service_with_retry ( )
def wait_for_service_account_deleted ( self ,
name : str ,
timeout_sec = WAIT_SHORT_TIMEOUT_SEC ,
wait_sec = WAIT_SHORT_SLEEP_SEC ) :
@retrying . retry ( retry_on_result = lambda r : r is not None ,
stop_max_delay = timeout_sec * 1000 ,
wait_fixed = wait_sec * 1000 )
def _wait_for_deleted_service_account_with_retry ( ) :
service_account = self . get_service_account ( name )
if service_account is not None :
logger . debug ( ' Waiting for service account %s to be deleted ' ,
service_account . metadata . name )
return service_account
_wait_for_deleted_service_account_with_retry ( )
timeout_sec : int = WAIT_SHORT_TIMEOUT_SEC ,
wait_sec : int = WAIT_SHORT_SLEEP_SEC ) - > None :
retryer = retryers . constant_retryer (
wait_fixed = datetime . timedelta ( seconds = wait_sec ) ,
timeout = datetime . timedelta ( seconds = timeout_sec ) ,
check_result = lambda service : service is None )
retryer ( self . get_service , name )
def wait_for_service_account_deleted (
self ,
name : str ,
timeout_sec : int = WAIT_SHORT_TIMEOUT_SEC ,
wait_sec : int = WAIT_SHORT_SLEEP_SEC ) - > None :
retryer = retryers . constant_retryer (
wait_fixed = datetime . timedelta ( seconds = wait_sec ) ,
timeout = datetime . timedelta ( seconds = timeout_sec ) ,
check_result = lambda service_account : service_account is None )
retryer ( self . get_service_account , name )
def wait_for_namespace_deleted ( self ,
timeout_sec = WAIT_LONG_TIMEOUT_SEC ,
wait_sec = WAIT_LONG_SLEEP_SEC ) :
@retrying . retry ( retry_on_result = lambda r : r is not None ,
stop_max_delay = timeout_sec * 1000 ,
wait_fixed = wait_sec * 1000 )
def _wait_for_deleted_namespace_with_retry ( ) :
namespace = self . get ( )
if namespace is not None :
logger . debug ( ' Waiting for namespace %s to be deleted ' ,
namespace . metadata . name )
return namespace
_wait_for_deleted_namespace_with_retry ( )
timeout_sec : int = WAIT_LONG_TIMEOUT_SEC ,
wait_sec : int = WAIT_LONG_SLEEP_SEC ) - > None :
retryer = retryers . constant_retryer (
wait_fixed = datetime . timedelta ( seconds = wait_sec ) ,
timeout = datetime . timedelta ( seconds = timeout_sec ) ,
check_result = lambda namespace : namespace is None )
retryer ( self . get )
def wait_for_service_neg ( self ,
name : str ,
timeout_sec = WAIT_SHORT_TIMEOUT_SEC ,
wait_sec = WAIT_SHORT_SLEEP_SEC ) :
@retrying . retry ( retry_on_result = lambda r : not r ,
stop_max_delay = timeout_sec * 1000 ,
wait_fixed = wait_sec * 1000 )
def _wait_for_service_neg ( ) :
service = self . get_service ( name )
if self . NEG_STATUS_META not in service . metadata . annotations :
logger . debug ( ' Waiting for service %s NEG ' ,
service . metadata . name )
return False
return True
_wait_for_service_neg ( )
timeout_sec : int = WAIT_SHORT_TIMEOUT_SEC ,
wait_sec : int = WAIT_SHORT_SLEEP_SEC ) - > None :
timeout = datetime . timedelta ( seconds = timeout_sec )
retryer = retryers . constant_retryer (
wait_fixed = datetime . timedelta ( seconds = wait_sec ) ,
timeout = timeout ,
check_result = self . _check_service_neg_annotation )
try :
retryer ( self . get_service , name )
except retryers . RetryError as e :
logger . error (
' Timeout %s (h:mm:ss) waiting for service %s to report NEG '
' status. Last service status: \n %s ' , timeout , name ,
self . _pretty_format_status ( e . result ( ) ) )
raise
def get_service_neg ( self , service_name : str ,
service_port : int ) - > Tuple [ str , List [ str ] ] :
@ -309,9 +294,10 @@ class KubernetesNamespace: # pylint: disable=too-many-public-methods
def get_deployment ( self , name ) - > V1Deployment :
return self . api . apps . read_namespaced_deployment ( name , self . name )
def delete_deployment ( self ,
name ,
grace_period_seconds = DELETE_GRACE_PERIOD_SEC ) :
def delete_deployment (
self ,
name : str ,
grace_period_seconds : int = DELETE_GRACE_PERIOD_SEC ) - > None :
self . api . apps . delete_namespaced_deployment (
name = name ,
namespace = self . name ,
@ -325,67 +311,82 @@ class KubernetesNamespace: # pylint: disable=too-many-public-methods
def wait_for_deployment_available_replicas (
self ,
name ,
count = 1 ,
timeout_sec = WAIT_MEDIUM_TIMEOUT_SEC ,
wait_sec = WAIT_MEDIUM_SLEEP_SEC ) :
@retrying . retry (
retry_on_result = lambda r : not self . _replicas_available ( r , count ) ,
stop_max_delay = timeout_sec * 1000 ,
wait_fixed = wait_sec * 1000 )
def _wait_for_deployment_available_replicas ( ) :
deployment = self . get_deployment ( name )
logger . debug (
' Waiting for deployment %s to have %s available '
' replicas, current count %s ' , deployment . metadata . name , count ,
deployment . status . available_replicas )
return deployment
_wait_for_deployment_available_replicas ( )
def wait_for_deployment_deleted ( self ,
deployment_name : str ,
timeout_sec = WAIT_MEDIUM_TIMEOUT_SEC ,
wait_sec = WAIT_MEDIUM_SLEEP_SEC ) :
@retrying . retry ( retry_on_result = lambda r : r is not None ,
stop_max_delay = timeout_sec * 1000 ,
wait_fixed = wait_sec * 1000 )
def _wait_for_deleted_deployment_with_retry ( ) :
deployment = self . get_deployment ( deployment_name )
if deployment is not None :
logger . debug (
' Waiting for deployment %s to be deleted. '
' Non-terminated replicas: %s ' , deployment . metadata . name ,
deployment . status . replicas )
return deployment
_wait_for_deleted_deployment_with_retry ( )
name : str ,
count : int = 1 ,
timeout_sec : int = WAIT_MEDIUM_TIMEOUT_SEC ,
wait_sec : int = WAIT_SHORT_SLEEP_SEC ) - > None :
timeout = datetime . timedelta ( seconds = timeout_sec )
retryer = retryers . constant_retryer (
wait_fixed = datetime . timedelta ( seconds = wait_sec ) ,
timeout = timeout ,
check_result = lambda depl : self . _replicas_available ( depl , count ) )
try :
retryer ( self . get_deployment , name )
except retryers . RetryError as e :
logger . error (
' Timeout %s (h:mm:ss) waiting for deployment %s to report %i '
' replicas available. Last status: \n %s ' , timeout , name , count ,
self . _pretty_format_status ( e . result ( ) ) )
raise
def wait_for_deployment_replica_count (
self ,
deployment : V1Deployment ,
count : int = 1 ,
* ,
timeout_sec : int = WAIT_MEDIUM_TIMEOUT_SEC ,
wait_sec : int = WAIT_SHORT_SLEEP_SEC ) - > None :
timeout = datetime . timedelta ( seconds = timeout_sec )
retryer = retryers . constant_retryer (
wait_fixed = datetime . timedelta ( seconds = wait_sec ) ,
timeout = timeout ,
check_result = lambda pods : len ( pods ) == count )
try :
retryer ( self . list_deployment_pods , deployment )
except retryers . RetryError as e :
result = e . result ( default = [ ] )
logger . error (
' Timeout %s (h:mm:ss) waiting for pod count %i , got: %i . '
' Pod statuses: \n %s ' , timeout , count , len ( result ) ,
self . _pretty_format_statuses ( result ) )
raise
def wait_for_deployment_deleted (
self ,
deployment_name : str ,
timeout_sec : int = WAIT_MEDIUM_TIMEOUT_SEC ,
wait_sec : int = WAIT_MEDIUM_SLEEP_SEC ) - > None :
retryer = retryers . constant_retryer (
wait_fixed = datetime . timedelta ( seconds = wait_sec ) ,
timeout = datetime . timedelta ( seconds = timeout_sec ) ,
check_result = lambda deployment : deployment is None )
retryer ( self . get_deployment , deployment_name )
def list_pods_with_labels ( self , labels : dict ) - > List [ V1Pod ] :
pod_list : V1PodList = self . api . core . list_namespaced_pod (
self . name , label_selector = label_dict_to_selector ( labels ) )
return pod_list . items
def get_pod ( self , name ) - > client . V1Pod :
def get_pod ( self , name : str ) - > V1Pod :
return self . api . core . read_namespaced_pod ( name , self . name )
def wait_for_pod_started ( self ,
pod_name ,
timeout_sec = WAIT_SHORT_TIMEOUT_SEC ,
wait_sec = WAIT_SHORT_SLEEP_SEC ) :
@retrying . retry ( retry_on_result = lambda r : not self . _pod_started ( r ) ,
stop_max_delay = timeout_sec * 1000 ,
wait_fixed = wait_sec * 1000 )
def _wait_for_pod_started ( ) :
pod = self . get_pod ( pod_name )
logger . debug ( ' Waiting for pod %s to start, current phase: %s ' ,
pod . metadata . name , pod . status . phase )
return pod
_wait_for_pod_started ( )
pod_name : str ,
timeout_sec : int = WAIT_SHORT_TIMEOUT_SEC ,
wait_sec : int = WAIT_SHORT_SLEEP_SEC ) - > None :
timeout = datetime . timedelta ( seconds = timeout_sec )
retryer = retryers . constant_retryer (
wait_fixed = datetime . timedelta ( seconds = wait_sec ) ,
timeout = timeout ,
check_result = self . _pod_started )
try :
retryer ( self . get_pod , pod_name )
except retryers . RetryError as e :
logger . error (
' Timeout %s (h:mm:ss) waiting for pod %s to start. '
' Pod status: \n %s ' , timeout , pod_name ,
self . _pretty_format_status ( e . result ( ) ) )
raise
def port_forward_pod (
self ,
@ -400,12 +401,55 @@ class KubernetesNamespace: # pylint: disable=too-many-public-methods
pf . connect ( )
return pf
@staticmethod
def _pod_started ( pod : V1Pod ) :
return pod . status . phase not in ( ' Pending ' , ' Unknown ' )
def _pretty_format_statuses ( self ,
k8s_objects : List [ Optional [ object ] ] ) - > str :
return ' \n ' . join (
self . _pretty_format_status ( k8s_object )
for k8s_object in k8s_objects )
def _pretty_format_status ( self , k8s_object : Optional [ object ] ) - > str :
if k8s_object is None :
return ' No data '
# Parse the name if present.
if hasattr ( k8s_object , ' metadata ' ) and hasattr ( k8s_object . metadata ,
' name ' ) :
name = k8s_object . metadata . name
else :
name = ' Can \' t parse resource name '
# Pretty-print the status if present.
if hasattr ( k8s_object , ' status ' ) :
try :
status = self . _pretty_format ( k8s_object . status . to_dict ( ) )
except Exception as e : # pylint: disable=broad-except
# Catching all exceptions because not printing the status
# isn't as important as the system under test.
status = f ' Can \' t parse resource status: { e } '
else :
status = ' Can \' t parse resource status '
@staticmethod
def _replicas_available ( deployment , count ) :
return ( deployment is not None and
# Return the name of k8s object, and its pretty-printed status.
return f ' { name } : \n { status } \n '
def _pretty_format ( self , data : dict ) - > str :
""" Return a string with pretty-printed yaml data from a python dict. """
yaml_out : str = yaml . dump ( data , explicit_start = True , explicit_end = True )
return self . _highlighter . highlight ( yaml_out )
@classmethod
def _check_service_neg_annotation ( cls ,
service : Optional [ V1Service ] ) - > bool :
return ( isinstance ( service , V1Service ) and
cls . NEG_STATUS_META in service . metadata . annotations )
@classmethod
def _pod_started ( cls , pod : V1Pod ) - > bool :
return ( isinstance ( pod , V1Pod ) and
pod . status . phase not in ( ' Pending ' , ' Unknown ' ) )
@classmethod
def _replicas_available ( cls , deployment : V1Deployment , count : int ) - > bool :
return ( isinstance ( deployment , V1Deployment ) and
deployment . status . available_replicas is not None and
deployment . status . available_replicas > = count )