
    Y                        d Z ddlmZ ddlmZ ddlmZ ddlZddlmZ ddlmZ	 ddl
mZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dZddddddddddd
Z d d!d"d#d$d%d&d'd(d)d
Z!d*Z"d+ Z#d, Z$d- Z%	 d9d.Z&	 d9d/Z'd0 Z(d1 Z)d2 Z*d3 Z+d4 Z,d5 Z-d6 Z.d7 Z/d8 Z0y):z-Utilities for the model garden command group.    )absolute_import)division)unicode_literalsN)encoding)
exceptions)
operations)client)metric)
quota_info)endpoints_util)models_util)operations_util)log)
properties)requests)	resources)
console_io?   +CustomModelServingP4GPUsPerProjectPerRegion+CustomModelServingT4GPUsPerProjectPerRegion+CustomModelServingL4GPUsPerProjectPerRegion,CustomModelServingK80GPUsPerProjectPerRegion-CustomModelServingV100GPUsPerProjectPerRegion-CustomModelServingP100GPUsPerProjectPerRegion-CustomModelServingA100GPUsPerProjectPerRegion1CustomModelServingA10080GBGPUsPerProjectPerRegion-CustomModelServingH100GPUsPerProjectPerRegion+CustomModelServingV5ETPUPerProjectPerRegion)
NVIDIA_TESLA_P4NVIDIA_TESLA_T4	NVIDIA_L4NVIDIA_TESLA_K80NVIDIA_TESLA_V100NVIDIA_TESLA_P100NVIDIA_TESLA_A100NVIDIA_A100_80GBNVIDIA_H100_80GBTPU_V5_LITEPOD#custom_model_serving_nvidia_p4_gpus#custom_model_serving_nvidia_t4_gpus#custom_model_serving_nvidia_l4_gpus$custom_model_serving_nvidia_k80_gpus%custom_model_serving_nvidia_v100_gpus%custom_model_serving_nvidia_p100_gpus%custom_model_serving_nvidia_a100_gpus*custom_model_serving_nvidia_a100_80gb_gpus%custom_model_serving_nvidia_h100_gpuscustom_model_serving_tpu_v5ea  metric.type="serviceruntime.googleapis.com/quota/allocation/usage" AND resource.type="consumer_quota" AND metric.label.quota_metric="aiplatform.googleapis.com/{}" AND resource.label.project_id="{}" AND resource.label.location="{}" AND resource.label.service="aiplatform.googleapis.com"c                     t         j                  j                  | |t        j                  j
                  j                  j                  dd      S )z<Parses a Vertex Endpoint ID into a endpoint resource object.)locationsId
projectsIdz'aiplatform.projects.locations.endpoints)params
collection)r   REGISTRYParser   VALUEScoreproject	GetOrFail)endpoint_idlocation_ids     7lib/googlecloudsdk/command_lib/ai/model_garden_utils.py_ParseEndpointrA   Q   sK    				!	!$"))..66@@ ; 
" 
     c                     t        j                  |dddt        |         }|j                  D ]0  }|j                  d   | k(  s|j
                  j                  xs dc S  y)z<Gets the quota limit for the accelerator type in the region.Nzaiplatform.googleapis.comr   )r   GetQuotaInfo!_ACCELERATOR_TYPE_TO_QUOTA_ID_MAPdimensionsInfosapplicableLocationsdetailsvalue)regionr<   accelerator_typeaccelerator_quotaregion_infos        r@   _GetQuotaLimitrN   ]   si     --

!'(89 '66k&&q)V3  &&+!+ 7 
rB   c                    t         j                   j                  t         j                  j                        }|t        j                  d      z
  }|j                  d      j                  dd      }|j                  d      j                  dd      }t        j                         j                  |dt        j                         j                  j                  j                  ||t        j                  t         |   ||             }	 |j"                  d	   j$                  d	   j&                  j(                  }|S # t*        $ r d	}Y |S w xY w)
zTGets the quota usage for the accelerator type in the region using the monitoring AP.   )hoursseconds)timespecz+00:00Z60s)r<   aggregation_alignment_periodaggregation_per_series_alignerinterval_start_timeinterval_end_time
filter_strr   )datetimenowtimezoneutc	timedelta	isoformatreplacer
   MetricClientListTimeSeriesByProjectGetMessagesModule'MonitoringProjectsTimeSeriesListRequest*AggregationPerSeriesAlignerValueValuesEnumALIGN_NEXT_OLDER_TIME_SERIES_FILTERformat%_ACCELERATOR_TYPE_TP_QUOTA_METRIC_MAP
timeSeriespointsrI   
int64Value
IndexError)	rJ   r<   rK   current_time_utctwenty_five_hours_ago_time_utcrfc3339_time"rfc3339_time_twenty_five_hours_agoquota_usage_time_seriescurrent_usages	            r@   _GetQuotaUsageru   m   sZ    &&**8+<+<+@+@A#3h6H6H7 $  "++Y+?GG, (F'O'O (P (GHc % #//1II#(%+%=%=%?%g%g  &S  &S  &d  &d<$$++
/0@
A

 J **1-44Q7==HH  
 
 M	s   0D= =EEc                     | rd| d| j                  dd      d t         S d| d| j                  dd      d t         S )Nhf--._zmg-ra   _MAX_LABEL_VALUE_LENGTHis_hf_modelpublisher_name
model_namemodel_version_names       r@   GetCLIEndpointLabelValuer      sg      *.66sC@ 	    "4!56>>sCH 	  rB   c                     | rd| d| j                  dd      d t         S d| d| d| j                  dd      d t         S )Nrw   rx   ry   rz   zpublishers-z-models-r{   r}   s       r@   GetOneClickEndpointLabelValuer      sq      *.66sC@ 	  
 n%Xj\;M:NO	

 %$		
rB   c                     t        j                         j                  d|  d| d      }|j                  dk7  rt	        j
                  d      t        |j                         d         S )z9Checks if the HF model is gated or not by calling HF API."https://huggingface.co/api/models//z?blobs=true   zfSomething went wrong when we call HuggingFace's API to get the model metadata. Please try again later.gated)r   
GetSessiongetstatus_codecore_exceptionsInternalErrorbooljson)r   r   hf_responses      r@   IsHFModelGatedr      sq    ##%))*>*:!J<{S+ #

'
'	3  
k )	**rB   c                     t        j                         j                  dd| d| ddd|  i      }|j                  dk7  rt	        j
                  d	      y )
NGETr   r   z/auth-checkAuthorizationzBearer )headersr   zaThe Hugging Face access token is not valid or does not have permission to access the gated model.)r   r   requestr   r   Error)hf_tokenr   r   r   s       r@   VerifyHFTokenPermissionr      sr    ##%--*>*:!J<{S'( 45 . +
 #


	&  	rB   c                    	 |j                   j                  j                  }d}| j
                  s| j                  s| j                  r|D ]  }| j
                  r-|j                  j                  j                  | j
                  k7  s| j                  rDt        |j                  j                  j                        | j                  j                         k7  s/| j                  r$|j                  j                  | j                  k7  r|} n |st        j                  d      t         j"                  j%                  d       n$|d   }t         j"                  j%                  d       |j                  j                  }|j                  j                  }|j                  r,t         j"                  j%                  d|j                          |j                  r,t         j"                  j%                  d|j                          |j&                  r,t         j"                  j%                  d	|j&                          |r"t         j"                  j%                  d
|        |S # t        $ r t        j                  d      w xY w)zGReturns a best suited deployment configuration for the publisher model.zModel does not support deployment, please use a deploy-able model instead. You can use the `gcloud ai model-garden models list` command to find out which ones are currently supported by the `deploy` command.NzThe machine type, accelerator type and/or container image URI is not supported by the model. You can use `gcloud alpha/beta ai model-garden models list-deployment-config` command to find the supported configurationsz,Using the selected deployment configuration:r   z+Using the default deployment configuration:z Machine type: z Accelerator type: z Accelerator count: z Container image URI: )supportedActionsmultiDeployVertexAttributeErrorr   r   machine_typerK   container_image_uridedicatedResourcesmachineSpecmachineTypestracceleratorTypeuppercontainerSpecimageUrir   statusPrintacceleratorCount)argspublisher_modelmulti_deploydeploy_configdeploymachine_specr   s          r@   GetDeployConfigr      s&   
((::LL  -	$//43K3K ++77CC""# ##f//;;KKL&&,,./ &&&&//43K3KK 	m' * !!&  JJCD !OMJJBC11==,%33<<JJ|'?'?&@AB!!JJ*<+G+G*HIJ""JJ+L,I,I+JKLJJ-.A-BCD	m 
 


	 s    I   I?c                 B   |dk(  rd}d}n	|dk(  rd}d}t         j                  j                  j                  j	                         }t        | j                  ||      }||k  r3t        j                  dt        |    d| j                   d| d	| d
	      t        | j                  ||      }||z   |kD  r6t        j                  dt        |    d| j                   d| d| d	| d
      t        j                  j                  d| d| j                   d| d| d	       y)z8Checks the accelerator quota for the project and region.zct5lp-hightpu-1tr(      zct5lp-hightpu-4t   z+The project does not have enough quota for z in z) to deploy the model. The quota limit is z and you are requesting for z. Please use a different region or request more quota by following https://cloud.google.com/vertex-ai/docs/quotas#requesting_additional_quota.z+ to deploy the model. The current usage is z out of zNThe project has enough quota. The current usage of quota for accelerator type z in region z is ry   N)r   r:   r;   r<   r=   rN   rJ   r   r   rj   ru   r   r   r   )r   r   rK   accelerator_countr<   quota_limitrt   s          r@   CheckAcceleratorQuotar     s    '''))'""**446't{{G5EF+$$


	12BCD EKK= 11< >,- .W		W  !g7GH-&&4


	12BCD EKK= 33@/ B= ,- .W	W	 	 **+,K} E

Q0rB   c           
      T   |j                  || |j                  j                  j                  |j                  j                  j                  j	                  d|      g            }t        j                  ||t        j                  |j                              }|t        j                  d      t        j                  |      }d|vrt        j                  d      t        j                  j!                  dj#                  |d                |d   j%                  d      d	   S )
z)Creates a Vertex endpoint for deployment.zmg-cli-deploy)keyrI   )additionalProperties)labelszEInternal error: Failed to create a Vertex endpoint. Please try again.namezOCreated Vertex AI endpoint: {}.
Starting to upload the model to Model Registry.r   )
CreateBetamessages$GoogleCloudAiplatformV1beta1EndpointLabelsValueAdditionalPropertyr   WaitForOpMayber   ParseOperationr   r   r   r   MessageToPyValuer   r   r   ri   split)endpoint_namelabel_value
region_refoperation_clientendpoints_clientcreate_endpoint_opcreate_endpoint_response_msgresponses           r@   CreateEndpointr   8  s2    (22&&KKWW''LLXXkk%[ l   X  3 
 "1!?!?##$6$;$;<"
 ")

'
'O  &&'CD(8

'
'O  ** x 	 
&				$R	((rB   c                 j   d\  }}}	| j                   j                  rM| j                   j                  D 
ci c]  }
|
j                  |
j                   }}
|rd|v r|j                  |d<   | j                   j
                  rt        | j                   j
                        }| j                   j                  rt        | j                   j                        }	t        j                         }|j                  |j                  j                  j                         |dd| j                  | j                   j                  |	||| j                   j                   d   j"                  gd| j                   j$                  | j                   j&                  |j(                  j+                  |j(                  j-                  dj/                  |rd|z   n||                  	      }t1        j2                  t5        j6                         |t9        j:                  |j                        
      }|t=        j>                  d      tA        jB                  |      }d|vrt=        j>                  d      tD        jF                  jI                  dj/                  |d                |d   jK                  d      d   S c c}
w )z1Uploads the Model Garden model to Model Registry.)NNNHUGGING_FACE_HUB_TOKENNr   zpublishers/{}/models/{}rw   )publicModelName)modelGardenSource)base_model_source)operations_clientopop_refz`Internal error: Failed to upload a Model Garden model to Model Registry. Please try again later.modelzEUploaded model to Model Registry at {}.
Starting to deploy the model.r   r   )&r   envr   rI   hugging_face_access_tokenr   listcommandclient_modelsModelsClientUploadV1Beta1CONCEPTSrJ   r9   artifactUrir   portscontainerPortpredictRoutehealthRouter   0GoogleCloudAiplatformV1beta1ModelBaseModelSource-GoogleCloudAiplatformV1beta1ModelGardenSourceri   r   r   r   OperationsClientr   ParseModelOperationr   r   r   r   r   r   r   r   )r   r   requires_hf_tokenr~   uploaded_model_namer   publisher_model_namecontainer_env_varscontainer_argscontainer_commandsvarmodels_clientupload_model_opupload_model_response_msgupload_model_responses                  r@   UploadModelr   b  s    <L8n&8  $$'4'B'B'F'F'F#))'F   59KK

(
( 12   %%-55::;N  ((m99AAB,,.-!//
mm  "

!!**""((+99:
!!..!!--%..__)22``7>>,7%.(^& a  `  0 /2 .<<"335	,,_-A-AB
 &

'
'	-  #334MN))

'
'	-  **$W-.	 
w	'	-	-c	22	66ys   J0c                    |j                   j                  j                  }|j                   j                  j                  }d}	||7i }	|,t	        |      j                         j                  dd      |	d<   |||	d<   |j                  t        || j                        || j                  ||j                   j                  j                  |	dd      }
t        j                  ||
t        j                  |
j                        d       |
j                  j!                  d	      d
   }t#        d| d| d| j                   d| d| j                   d       y)z8Deploys the Model Registry model to the Vertex endpoint.Nrz   rx   typecountT)r   accelerator_dictenable_access_loggingenable_container_logging)asynchronousr   r   Deploying the model to the endpoint. To check the deployment status, you can try one of the following methods:
1) Look for endpoint `g` at the [Vertex AI] -> [Online prediction] tab in Cloud Console
2) Use `gcloud ai operations describe 
 --region=zc` to find the status of the deployment long-running operation
3) Use `gcloud ai endpoints describe z+` command to check the endpoint's metadata.)r   r   r   r   r   lowerra   DeployModelBetarA   rJ   r   r   r   r   r   r   r   print)r   r   r>   r   model_idr   r   rK   r   r   deploy_model_opdeploy_op_ids               r@   DeployModelr    so    &&22BB  &&22CC  !%6%B#!$%5!6!<!<!>!F!FsC!Pv$"3w$44[$++.
kk 33??KK' # 5 	/   ##O$8$89	 !%%++C04,!? #.
4;;- 8(MDKK= A++	rB   c           	      4   	 |%|j                   }|j                  }|j                  }nd}d}d} |j                  d8i dt        j
                  j                  j                  j                         d| j                  d|d| j                  d|d|d|d	|d
| j                  d| j                  d| j                  d| j                  d| j                  d| j                   d| j"                  d| j$                  d| j&                  d| j(                  d| j*                  d| j,                  d| j.                  d| j0                  d| j2                  d| j4                  d| j6                  d| j8                  d| j:                  d| j<                  d| j>                  }	|	j\                  jU                  d'      d1   }tL        jN                  jQ                  d2| d3| d4| j                   d5       t_        j`                  ||	tc        |	j\                        | jd                  d67       y# t@        jB                  $ rN}
|
jD                  dk(  rbd |
jF                  v rT| j                  H| j                  s'tI        jJ                  d!      | _        | j                  s't        | |||||       Y d}
~
y|
jD                  d"k(  rd#|
jF                  v rtL        jN                  jQ                  d$       | jR                  jU                  d%      d&   jU                  d'      \  }}	 tI        jV                  d(| d)| d*d+d,d-d.d-/      | _
        n,# tH        jX                  $ r t[        jX                  d0      w xY wt        | |||||       Y d}
~
y|
d}
~
ww xY w)9z1Deploys the publisher model to a Vertex endpoint.Nr<   locationr   accept_eularK   r   r   endpoint_display_namer   spotreservation_affinityuse_dedicated_endpointenable_fast_tryoutr   container_commandr   r   container_portscontainer_grpc_portscontainer_predict_routecontainer_health_route$container_deployment_timeout_secondscontainer_shared_memory_size_mbcontainer_startup_probe_exec&container_startup_probe_period_seconds'container_startup_probe_timeout_secondscontainer_health_probe_exec%container_health_probe_period_seconds&container_health_probe_timeout_secondsi  z)provide a valid Hugging Face access tokenz2Please enter your Hugging Face read access token: i  EULAzIThe End User License Agreement (EULA) of the model has not been accepted.@r   r   zThe model can be deployed only if the EULA of the model has been accepted. You can view it at https://console.cloud.google.com/vertex-ai/publishers/z/model-garden/z):zDo you want to accept the EULA?FTzEULA is not accepted.)messageprompt_stringdefaultcancel_on_nocancel_stringthrow_if_unattendedz6Please accept the EULA using the `--accept-eula` flag.r   r   r   r   z>` to find the status of the deployment long-running operation
i6 )r   max_wait_ms )3r   r   r   Deployr   r:   r;   r<   r=   rJ   r
  r   r  r  r  r  r   r  r   r   r  r  r  r  r  r  r  r  r  r  r  r  apitools_exceptions	HttpErrorr   contentr   PromptPasswordr   r   r   r   r   PromptContinuer   r   r   r   r   r   r   )r   r   r   r   r   	mg_clientr   rK   r   	deploy_ope	publisherr  r  s                 r@   r'  r'    s   ^!--l%55&77l 	   !!&&..88:  $$	
 * , " , #'"@"@ YY "66  $::  22 !44 00  **!"  22#$ ,,%& "66'( !% < <)*  $::+, .2-V-V-. )-(L(L/0 &*%F%F12 04/Z/Z34 150\0\56 %)$D$D78 /3.X.X9: 04/Z/Z;In %%c*2.,**!? #.
4;;- 84	4   Y^^$$$C 
	&	& 6 	
7199D**2..)3)B)B@*
& .. 





 	
#	&AII"5	jj8 !JJ,,S1!4::3?i
%44J KTTbckbllnp
 <1 $
  
##D
 	

 





 gm6sD   F*H5 5N	AN%N:A,N''MN)M88NNNc                 D    t         j                  j                  | d      S )zParse operation resource to the operation reference object.

  Args:
    operation_name: The operation resource to wait on

  Returns:
    The operation reference object
  z(aiplatform.projects.locations.operations)r7   )r   r8   ParseRelativeName)operation_names    r@   r   r   Y  s(     
			-	-; 
. 
 rB   ) r4  )1__doc__
__future__r   r   r   r[   apitools.base.pyr   r   r(  googlecloudsdk.api_lib.air    googlecloudsdk.api_lib.ai.modelsr	   r   !googlecloudsdk.api_lib.monitoringr
   googlecloudsdk.api_lib.quotasr   googlecloudsdk.command_lib.air   r   r   googlecloudsdk.corer   r   r   r   r   googlecloudsdk.core.consoler   r|   rE   rj   rh   rA   rN   ru   r   r   r   r   r   r   r   r   r  r'  r   r&  rB   r@   <module>r?     s   4 &  '  % > 0 D 4 4 8 5 9 = # * ( ) 2   ED>FHHHKGC% ! =<6>@@@D?4) %: 	 #N DF
 DF$
+	<~)X')TH7V3lslrB   