
    \B                        d Z ddlmZ ddlmZ ddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ  ej:                  ej<                  j>                  ej<                  j@                  ej<                  jB                        ejD                   G d dejF                                      Z$y)zModel Garden deploy command.    )absolute_import)division)unicode_literalsN)
exceptions)
operations)client)apis)arg_parsers)base)	constants)endpoint_util)flags)model_garden_utils)region_util)
validation)
propertiesc                   &    e Zd ZdZed        Zd Zy)Deployac  Deploy a model in Model Garden to a Vertex AI endpoint.

  ## EXAMPLES

  To deploy a Model Garden model `google/gemma2/gemma2-9b` under project
  `example` in region
  `us-central1`, run:

    $ gcloud ai model-garden models deploy
    --model=google/gemma2@gemma-2-9b
    --project=example
    --region=us-central1

  To deploy a Hugging Face model `meta-llama/Meta-Llama-3-8B` under project
  `example` in region `us-central1`, run:

    $ gcloud ai model-garden models deploy
    --model=meta-llama/Meta-Llama-3-8B
    --hugging-face-access-token={hf_token}
    --project=example
    --region=us-central1
  c           
      (	   t        j                  ddd      j                  |        t        j                  ddd      j                  |        t        j                  ddd	      j                  |        t        j                  | d
t
        j                         t        j                  ddd      j                  |        t        j                  ddd      j                  |        t        j                  ddt        d      j                  |        t        j                  ddddd      j                  |        t        j                  ddddd      j                  |        t        j                  dt        j                  t        t        t        j                         ddg      d      j                  |        t        j                  d dddd!"      j                  |        t        j                  d#dddd$"      j                  |        t        j                  d%dddd&"      j                  |        t        j                  d'd()      j                  |        | j                  d*d+t        j                         t        j                  d,-       | j                  d.t        j                         d/t        j                  d01       | j                  d2d3t        j                         d45       | j                  d6d7t        j                  t        j                  d8d9      :      t        j                  d;-       | j                  d<d7t        j                  t        j                  d8d9      :      t        j                  d=-       | j                  d>d?)       | j                  d@dA)       | j                  dBt        dC       | j                  dDt        dE       | j                  dFt        j                         dGdHI       | j                  dJt        dK       | j                  dLt        dM       | j                  dNt        j                         dOdPI       | j                  dQt        dR       | j                  dSt        dT       y )UN--modelTa  The model to be deployed. If it is a Model Garden model, it should be in the format of `{publisher_name}/{model_name}@{model_version_name}, e.g. `google/gemma2@gemma-2-2b`. If it is a Hugging Face model, it should be in the convention of Hugging Face models, e.g. `meta-llama/Meta-Llama-3-8B`. If it is a Custom Weights model, it should be in the format of `gs://{gcs_bucket_uri}`, e.g. `gs://-model-garden-public-us/llama3.1/Meta-Llama-3.1-8B-Instruct`.)requiredhelpz--hugging-face-access-tokenFzThe access token from Hugging Face needed to read the model artifacts of gated models. It is only needed when the Hugging Face model to deploy is gated.z--endpoint-display-namez5Display name of the endpoint with the deployed model.zto deploy the model)prompt_funcz--machine-typezThe machine type to deploy the model to. It should be a supported machine type from the deployment configurations of the model. Use `gcloud ai model-garden models list-deployment-config` to check the supported machine types.)r   r   z--accelerator-typezThe accelerator type to serve the model. It should be a supported accelerator type from the verified deployment configurations of the model. Use `gcloud ai model-garden models list-deployment-config` to check the supported accelerator types.z--accelerator-countzSThe accelerator count to serve the model. Accelerator count should be non-negative.)r   typer   z--accept-eulazNWhen set, the user accepts the End User License Agreement (EULA) of the model.
store_true)r   actiondefaultr   z--asynchronouszaIf set to true, the command will terminate immediately and not keep polling the operation status.z--reservation-affinity)reservation-affinity-typekeyvaluesr   )specrequired_keyszA ReservationAffinity can be used to configure a Vertex AI resource (e.g., a DeployedModel) to draw its Compute Engine resources from a Shared Reservation, or exclusively from on-demand capacity.)r   r   z--spotz5If true, schedule the deployment workload on Spot VM.)r   r   r   r   z--use-dedicated-endpointzIf true, the endpoint will be exposed through a dedicated DNS. Your request to the dedicated DNS will be isolated from other users' traffic and will have better performance and reliability.z--enable-fast-tryoutzIf True, model will be deployed using faster deployment path. Useful for quick experiments. Not for production workloads. Only available for most popular models with certain machine types.z--container-image-uriz{      URI of the Model serving container file in the Container Registry
      (e.g. gcr.io/myproject/server:latest).
      )r   z--container-env-varsz	KEY=VALUEz8List of key-value pairs to set as environment variables.)metavarr   r   r   z--container-commandCOMMANDzm  Entrypoint for the container image. If not specified, the container
  image's default entrypoint is run.
  )r   r#   r   r   z--container-argsARGz  Comma-separated arguments passed to the command run by the container
  image. If not specified and no `--command` is provided, the container
  image's default command is used.
  )r#   r   r   z--container-portsPORT   i  )element_typezd  Container ports to receive http requests at. Must be a number between 1 and
  65535, inclusive.
  z--container-grpc-portszd  Container ports to receive grpc requests at. Must be a number between 1 and
  65535, inclusive.
  z--container-predict-routez>HTTP path to send prediction requests to inside the container.z--container-health-routez8HTTP path to send health checks to inside the container.z&--container-deployment-timeout-secondszDeployment timeout in seconds.z!--container-shared-memory-size-mbz`  The amount of the VM memory to reserve as the shared memory for the model in
  megabytes.
    z--container-startup-probe-execSTARTUP_PROBE_EXECz  Exec specifies the action to take. Used by startup probe. An example of this
  argument would be ["cat", "/tmp/healthy"].
    )r   r#   r   z(--container-startup-probe-period-secondszh  How often (in seconds) to perform the startup probe. Default to 10 seconds.
  Minimum value is 1.
    z)--container-startup-probe-timeout-secondszm  Number of seconds after which the startup probe times out. Defaults to 1 second.
  Minimum value is 1.
    z--container-health-probe-execHEALTH_PROBE_EXECz  Exec specifies the action to take. Used by health probe. An example of this
  argument would be ["cat", "/tmp/healthy"].
    z'--container-health-probe-period-secondszg  How often (in seconds) to perform the health probe. Default to 10 seconds.
  Minimum value is 1.
    z(--container-health-probe-timeout-secondszl  Number of seconds after which the health probe times out. Defaults to 1 second.
  Minimum value is 1.
    )r   ArgumentAddToParserr   AddRegionResourceArgr   PromptForOpRegionintr
   ArgDictstrArgListadd_argumentUpdateAction
BoundedInt)parsers    ,lib/surface/ai/model_garden/models/deploy.pyArgszDeploy.ArgsC   s   MML	 k&MM%:	 k&MM!D k&	%;3P3P 	MM,
 	 k&MMQ
 	 k&MM'  k&MM 	 k&MM2 	 k&MM   -0%--/
 77
#  k&MMD k&MM"P
 k&MMM
 k&MM
 k&
  "''G     "''  	   "	  	   k.D.DQ.NO''  	    k.D.DQ.NO''  	 #M   "G   0-  
 +   (  "$	   2   3   '  "#	   1   2      c           
      |   |j                   j                  d      }|st        j                  |       t        j                  |j
                         |j                  j                  j                         }|j                         d   |_        t        j                  }d|j                   v}t        j                  |d      5  |r
t        |j                        t        |j                         cxk(  rt        |j"                        k(  sn t%        j&                  dd      d }|j                  rt)        j*                  ||j                  |j                   |j"                         t-        j.                  t        j0                  t        j2                  |         }|j4                  j7                  |j                  |j4                  j6                  j9                  |j                         |j"                  	      }t        j                  ||j                        5  d
j;                  dt=        t?        j>                               jA                  d      d   dg      }tC        jD                         }	tG        jH                  |      }
|j
                  r|j
                  n|}t)        jJ                  ||||j                   |
|	       d d d        nLtC        jD                         }	|rc|j                   jM                         jA                  d      \  }}	 |	jO                  d| d| d      }d
j;                  ||ddg      }| d| }nr|j                   jM                         jA                  d      \  }}	 |	jO                  d| d|       }d
j;                  ||jA                  d      d   dg      }d| d| }t)        jV                  ||      }t)        j*                  ||jX                  jZ                  j\                  t=        |jX                  jZ                  j^                        |jX                  jZ                  j`                         tb        jd                  jf                  jh                  jk                  d        t        j                  ||j                        5  tC        jD                         }	tG        jH                  |      }
|j
                  r|j
                  n|}t)        jJ                  ||jX                  jZ                  |||
|	       d d d        d d d        y # 1 sw Y   xY w# tP        jR                  $ r$ t%        jT                  d|j                    d      w xY w# tP        jR                  $ r$ t%        jT                  d|j                    d      w xY w# 1 sw Y   xY w# 1 sw Y   y xY w)Nzgs://locationsId@zus-central1)regionz:--machine-type, --accelerator-type and --accelerator-countz Arguments for MachineType, AcceleratorType and AcceleratorCount must either all be provided or all be empty for custom weights model deployment.)machine_typeaccelerator_typeaccelerator_count)machineTypeacceleratorTypeacceleratorCount-zcustom-weights.r   zmg-cli-deploy)version/zpublishers/z/models/T)
model_nameis_hugging_face_modelr   zF is not a supported Hugging Face model for deployment in Model Garden.hfzF is not a supported Model Garden model for deployment in Model Garden.r'   )6model
startswithr   ValidateModelGardenModelArgsValidateDisplayNameendpoint_display_nameCONCEPTSr=   ParseAsDictr   BETA_VERSIONr   AiplatformEndpointOverridesboolr>   r?   r@   c_exceptionsInvalidArgumentExceptionr   CheckAcceleratorQuotar	   GetClientInstanceAI_PLATFORM_API_NAMEAI_PLATFORM_API_VERSIONMESSAGES_MODULE'GoogleCloudAiplatformV1beta1MachineSpecAcceleratorTypeValueValuesEnumjoinr1   timesplit	client_mgModelGardenClientr   OperationsClientr   lowerGetPublisherModelapitools_exceptionsHttpNotFoundErrorUnknownArgumentExceptionGetDeployConfigdedicatedResourcesmachineSpecrA   rB   rC   r   VALUESapi_endpoint_overrides
aiplatformSet)selfargsis_custom_weights_model
region_refrF   is_hf_modelmachine_specr   default_endpoint_name	mg_clientoperation_clientendpoint_namepublisher_namerH   publisher_modelapi_model_argmodel_and_version_namedeploy_configs                     r7   Runz
Deploy.RunG  s`   "jj33G<"--d3""4#=#=>%%++-J##%m4DK$$GTZZ'K		2	2
 
! ""#D))*,D**+, 55J#  

2
2,,#44 $ 6 6	 )),,//8&
  //WW++$44\\{{''  $55 X , 66DKK
 #&(($))+$$S)!,, #

  113)'88I
 ++ (((  
#
#jj!
 
8 //1	'+zz'7'7'9'?'?'D
$.*
'99((8M&* : O #&((z4A#
 ,,Aj\:- 48::3C3C3E3K3KC3P
0.0	'99n-X6L5MNO #&(($**3/2, #
 N+84J3KL  +::/

 	00&99EEQQ 00<<LL ,>>JJ[[	
 	00;;??E 66DKK
  113)'88I
 ++ (((  
#
#..::
G
 
R
 
L %66 77::, 9 9 & %66 77::, / / B
 
G
 
sr   EV2B*T&AV2T2-A	V27U,DV2"A3V&V2&T/	+V227U))V2,7V##V2&V/	+V22V;N)__name__
__module____qualname____doc__staticmethodr8   r    r9   r7   r   r   '   s$    
. A AF_r9   r   )%r   
__future__r   r   r   r`   apitools.base.pyr   rg   googlecloudsdk.api_lib.air   &googlecloudsdk.api_lib.ai.model_gardenr   rb   googlecloudsdk.api_lib.utilr	   googlecloudsdk.callioper
   r   rV   googlecloudsdk.command_lib.air   r   r   r   r   r   googlecloudsdk.corer   ReleaseTracksReleaseTrackALPHABETAGADefaultUniverseOnlyCommandr   r   r9   r7   <module>r      s    # &  '  > 0 F , / ( > 3 7 / < 5 4 * T..33T5F5F5I5I {T\\ { {r9   