
    I^                        d Z ddlmZ ddlmZ ddlmZ dZ G d dej                        Z
 G d d	ej                        Z G d
 dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d  d!ej                        Z G d" d#ej                        Z G d$ d%ej                        Z G d& d'ej                        Z G d( d)ej                        Z G d* d+ej                        Z G d, d-ej                        Z G d. d/ej                        Z G d0 d1ej                        Z G d2 d3ej                        Z  G d4 d5ej                        Z! G d6 d7ej                        Z" ejF                  e d8d9        ejH                  e jJ                  d:d;        ejH                  e jJ                  d<d=       y>)?zNGenerated message classes for gkerecommender version v1.

GKE Recommender API
    )absolute_import)messages)encodinggkerecommenderc                       e Zd ZdZ ej
                  dej                  j                        Z ej
                  d      Z	y)Amountak  Represents an amount of money in a specific currency.

  Fields:
    nanos: Output only. Number of nano (10^-9) units of the amount. The value
      must be between -999,999,999 and +999,999,999 inclusive. If `units` is
      positive, `nanos` must be positive or zero. If `units` is zero, `nanos`
      can be positive, zero, or negative. If `units` is negative, `nanos` must
      be negative or zero. For example $-1.75 is represented as `units`=-1 and
      `nanos`=-750,000,000.
    units: Output only. The whole units of the amount. For example if
      `currencyCode` is `"USD"`, then 1 unit is one US dollar.
     variant   N)
__name__
__module____qualname____doc__	_messagesIntegerFieldVariantINT32nanosunits     Ylib/googlecloudsdk/generated_clients/apis/gkerecommender/v1/gkerecommender_v1_messages.pyr   r      s;     !)
 
 I,=,=,C,C
D%
 )
 
 
#%r   r   c                       e Zd ZdZ ej
                  dd      Z ej
                  dd      Z ej                  dej                  j                        Z ej                  d      Zy)	Costa  Cost for running a model deployment on a given instance type. Currently,
  only USD currency code is supported.

  Fields:
    costPerMillionInputTokens: Optional. The cost per million input tokens.
      $/input token = ($/output token) / output-to-input-cost-ratio.
    costPerMillionOutputTokens: Optional. The cost per million output tokens,
      calculated as: $/output token = GPU $/s / (1/output-to-input-cost-ratio
      * input tokens/s + output tokens/s)
    outputInputCostRatio: Optional. The output-to-input cost ratio. This
      determines how the total GPU cost is split between input and output
      tokens. If not provided, `4.0` is used, assuming a 4:1 output:input cost
      ratio.
    pricingModel: Optional. The pricing model used to calculate the cost. Can
      be one of: `3-years-cud`, `1-year-cud`, `on-demand`, `spot`. If not
      provided, `spot` will be used.
  r   r	   r      r
      N)r   r   r   r   r   MessageFieldcostPerMillionInputTokenscostPerMillionOutputTokens
FloatFieldr   FLOAToutputInputCostRatioStringFieldpricingModelr   r   r   r   r   "   sf    $ 5i44XqA5y55hB---a9J9J9P9PQ&&&q),r   r   c                   ~    e Zd ZdZ ej
                  d      Z ej                  dd      Z ej
                  d      Z	y)FetchBenchmarkingDataRequesta  Request message for GkeInferenceQuickstart.FetchBenchmarkingData.

  Fields:
    instanceType: Optional. The instance type to filter benchmarking data.
      Instance types are in the format `a2-highgpu-1g`. If not provided, all
      instance types for the given profile's `model_server_info` will be
      returned. Use GkeInferenceQuickstart.FetchProfiles to find available
      instance types.
    modelServerInfo: Required. The model server configuration to get
      benchmarking data for. Use GkeInferenceQuickstart.FetchProfiles to find
      valid configurations.
    pricingModel: Optional. The pricing model to use for the benchmarking
      data. Defaults to `spot`.
  r	   ModelServerInfor   r   N)
r   r   r   r   r   r$   instanceTyper   modelServerInfor%   r   r   r   r'   r'   ;   sB     '&&q),*I**+<a@/&&&q),r   r'   c                   :    e Zd ZdZ ej
                  ddd      Zy)FetchBenchmarkingDataResponsezResponse message for GkeInferenceQuickstart.FetchBenchmarkingData.

  Fields:
    profile: Output only. List of profiles containing their respective
      benchmarking data.
  Profiler	   TrepeatedN)r   r   r   r   r   r   profiler   r   r   r,   r,   P   s     #I""9a$?'r   r,   c                   \    e Zd ZdZ ej
                  dd      Z ej
                  d      Zy) FetchModelServerVersionsResponsea  Response message for GkeInferenceQuickstart.FetchModelServerVersions.

  Fields:
    modelServerVersions: Output only. A list of available model server
      versions.
    nextPageToken: Output only. A token which may be sent as page_token in a
      subsequent `FetchModelServerVersionsResponse` call to retrieve the next
      page of results. If this field is omitted or empty, then there are no
      more results to return.
  r	   Tr.   r   N)r   r   r   r   r   r$   modelServerVersionsnextPageTokenr   r   r   r2   r2   [   s0    	 .	--a$?')''*-r   r2   c                   \    e Zd ZdZ ej
                  dd      Z ej
                  d      Zy)FetchModelServersResponsea  Response message for GkeInferenceQuickstart.FetchModelServers.

  Fields:
    modelServers: Output only. List of available model servers. Open-source
      model servers use simplified, lowercase names (e.g., `vllm`).
    nextPageToken: Output only. A token which may be sent as page_token in a
      subsequent `FetchModelServersResponse` call to retrieve the next page of
      results. If this field is omitted or empty, then there are no more
      results to return.
  r	   Tr.   r   N)r   r   r   r   r   r$   modelServersr4   r   r   r   r6   r6   k   s/    	 '&&q48,')''*-r   r6   c                   \    e Zd ZdZ ej
                  dd      Z ej
                  d      Zy)FetchModelsResponsea  Response message for GkeInferenceQuickstart.FetchModels.

  Fields:
    models: Output only. List of available models. Open-source models follow
      the Huggingface Hub `owner/model_name` format.
    nextPageToken: Output only. A token which may be sent as page_token in a
      subsequent `FetchModelsResponse` call to retrieve the next page of
      results. If this field is omitted or empty, then there are no more
      results to return.
  r	   Tr.   r   N)r   r   r   r   r   r$   modelsr4   r   r   r   r9   r9   {   s/    	 !9  T2&')''*-r   r9   c                      e Zd ZdZ ej
                  d      Z ej
                  d      Z ej
                  d      Z ej                  dej                  j                        Z ej
                  d      Z ej                  dd	      Zy
)FetchProfilesRequesta  Request message for GkeInferenceQuickstart.FetchProfiles.

  Fields:
    model: Optional. The model to filter profiles by. Open-source models
      follow the Huggingface Hub `owner/model_name` format. If not provided,
      all models are returned. Use GkeInferenceQuickstart.FetchModels to find
      available models.
    modelServer: Optional. The model server to filter profiles by. If not
      provided, all model servers are returned. Use
      GkeInferenceQuickstart.FetchModelServers to find available model servers
      for a given model.
    modelServerVersion: Optional. The model server version to filter profiles
      by. If not provided, all model server versions are returned. Use
      GkeInferenceQuickstart.FetchModelServerVersions to find available
      versions for a given model and server.
    pageSize: Optional. The target number of results to return in a single
      response. If not specified, a default value will be chosen by the
      service. Note that the response may include a partial list and a caller
      should only rely on the response's next_page_token to determine if there
      are more instances left to be queried.
    pageToken: Optional. The value of next_page_token received from a previous
      `FetchProfilesRequest` call. Provide this to retrieve the subsequent
      page in a multi-page list of results. When paginating, all other
      parameters provided to `FetchProfilesRequest` must match the call that
      provided the page token.
    performanceRequirements: Optional. The performance requirements to filter
      profiles. Profiles that do not meet these requirements are filtered out.
      If not provided, all profiles are returned.
  r	   r   r   r   r
      PerformanceRequirements   N)r   r   r   r   r   r$   modelmodelServermodelServerVersionr   r   r   pageSize	pageTokenr   performanceRequirementsr   r   r   r<   r<      s    <  )


"%%	%%a(+,y,,Q/#Y##Ay/@/@/F/FG(#i##A&)2I223LaPr   r<   c                       e Zd ZdZ ej
                  d      Z ej
                  d      Z ej                  dd      Z	 ej                  ddd	      Z
y
)FetchProfilesResponsea  Response message for GkeInferenceQuickstart.FetchProfiles.

  Fields:
    comments: Output only. Additional comments related to the response.
    nextPageToken: Output only. A token which may be sent as page_token in a
      subsequent `FetchProfilesResponse` call to retrieve the next page of
      results. If this field is omitted or empty, then there are no more
      results to return.
    performanceRange: Output only. The combined range of performance values
      observed across all profiles in this response.
    profile: Output only. List of profiles that match the given model server
      info and performance requirements (if provided).
  r	   r   PerformanceRanger   r-   r   Tr.   N)r   r   r   r   r   r$   commentsr4   r   performanceRanger0   r   r   r   rG   rG      sY     #Y""1%(')''*-+Y++,>B"I""9a$?'r   rG   c                       e Zd ZdZ ej
                  d      Z ej
                  d      Z ej                  dd      Z	 ej                  dd      Z
 ej                  dd	      Zy
) GenerateOptimizedManifestRequestaf  Request message for GkeInferenceQuickstart.GenerateOptimizedManifest.

  Fields:
    acceleratorType: Required. The accelerator type. Use
      GkeInferenceQuickstart.FetchProfiles to find valid accelerators for a
      given `model_server_info`.
    kubernetesNamespace: Optional. The kubernetes namespace to deploy the
      manifests in.
    modelServerInfo: Required. The model server configuration to generate the
      manifest for. Use GkeInferenceQuickstart.FetchProfiles to find valid
      configurations.
    performanceRequirements: Optional. The performance requirements to use for
      generating Horizontal Pod Autoscaler (HPA) resources. If provided, the
      manifest includes HPA resources to adjust the model server replica count
      to maintain the specified targets (e.g., NTPOT, TTFT) at a P50 latency.
      Cost targets are not currently supported for HPA generation. If the
      specified targets are not achievable, the HPA manifest will not be
      generated.
    storageConfig: Optional. The storage configuration for the model. If not
      provided, the model is loaded from Huggingface.
  r	   r   r(   r   r>   r   StorageConfigr=   N)r   r   r   r   r   r$   acceleratorTypekubernetesNamespacer   r*   rE   storageConfigr   r   r   rL   rL      sm    , *I))!,/-	--a0*I**+<a@/2I223LaP()((!<-r   rL   c                       e Zd ZdZ ej
                  dd      Z ej                  ddd      Z ej
                  d      Z	y)	!GenerateOptimizedManifestResponseaR  Response message for GkeInferenceQuickstart.GenerateOptimizedManifest.

  Fields:
    comments: Output only. Comments related to deploying the generated
      manifests.
    kubernetesManifests: Output only. A list of generated Kubernetes
      manifests.
    manifestVersion: Output only. Additional information about the versioned
      dependencies used to generate the manifests. See [Run best practice
      inference with GKE Inference Quickstart
      recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-
      learning/inference/inference-quickstart) for details.
  r	   Tr.   KubernetesManifestr   r   N)
r   r   r   r   r   r$   rI   r   kubernetesManifestsmanifestVersionr   r   r   rR   rR      sH     #Y""1t4(.	../CQQUV)I))!,/r   rR   c                       e Zd ZdZ ej
                  d      Z ej
                  d      Z ej                  dej                  j                        Z ej
                  d      Zy)-GkerecommenderModelServerVersionsFetchRequesta  A GkerecommenderModelServerVersionsFetchRequest object.

  Fields:
    model: Required. The model for which to list model server versions. Open-
      source models follow the Huggingface Hub `owner/model_name` format. Use
      GkeInferenceQuickstart.FetchModels to find available models.
    modelServer: Required. The model server for which to list versions. Open-
      source model servers use simplified, lowercase names (e.g., `vllm`). Use
      GkeInferenceQuickstart.FetchModelServers to find available model
      servers.
    pageSize: Optional. The target number of results to return in a single
      response. If not specified, a default value will be chosen by the
      service. Note that the response may include a partial list and a caller
      should only rely on the response's next_page_token to determine if there
      are more instances left to be queried.
    pageToken: Optional. The value of next_page_token received from a previous
      `FetchModelServerVersionsRequest` call. Provide this to retrieve the
      subsequent page in a multi-page list of results. When paginating, all
      other parameters provided to `FetchModelServerVersionsRequest` must
      match the call that provided the page token.
  r	   r   r   r
   r   N)r   r   r   r   r   r$   r@   rA   r   r   r   rC   rD   r   r   r   rW   rW      s_    ,  )


"%%	%%a(+#Y##Ay/@/@/F/FG(#i##A&)r   rW   c                       e Zd ZdZ ej
                  d      Z ej                  dej                  j                        Z
 ej
                  d      Zy)&GkerecommenderModelServersFetchRequesta  A GkerecommenderModelServersFetchRequest object.

  Fields:
    model: Required. The model for which to list model servers. Open-source
      models follow the Huggingface Hub `owner/model_name` format. Use
      GkeInferenceQuickstart.FetchModels to find available models.
    pageSize: Optional. The target number of results to return in a single
      response. If not specified, a default value will be chosen by the
      service. Note that the response may include a partial list and a caller
      should only rely on the response's next_page_token to determine if there
      are more instances left to be queried.
    pageToken: Optional. The value of next_page_token received from a previous
      `FetchModelServersRequest` call. Provide this to retrieve the subsequent
      page in a multi-page list of results. When paginating, all other
      parameters provided to `FetchModelServersRequest` must match the call
      that provided the page token.
  r	   r   r
   r   N)r   r   r   r   r   r$   r@   r   r   r   rC   rD   r   r   r   rY   rY     sM    $  )


"%#Y##Ay/@/@/F/FG(#i##A&)r   rY   c                       e Zd ZdZ ej
                  dej                  j                        Z ej                  d      Z
y) GkerecommenderModelsFetchRequesta  A GkerecommenderModelsFetchRequest object.

  Fields:
    pageSize: Optional. The target number of results to return in a single
      response. If not specified, a default value will be chosen by the
      service. Note that the response may include a partial list and a caller
      should only rely on the response's next_page_token to determine if there
      are more instances left to be queried.
    pageToken: Optional. The value of next_page_token received from a previous
      `FetchModelsRequest` call. Provide this to retrieve the subsequent page
      in a multi-page list of results. When paginating, all other parameters
      provided to `FetchModelsRequest` must match the call that provided the
      page token.
  r	   r
   r   N)r   r   r   r   r   r   r   r   rC   r$   rD   r   r   r   r[   r[   .  s;     $Y##Ay/@/@/F/FG(#i##A&)r   r[   c                   |    e Zd ZdZ ej
                  d      Z ej
                  d      Z ej
                  d      Zy)rS   zA Kubernetes manifest.

  Fields:
    apiVersion: Output only. Kubernetes API version.
    content: Output only. YAML content.
    kind: Output only. Kubernetes resource kind.
  r	   r   r   N)	r   r   r   r   r   r$   
apiVersioncontentkindr   r   r   rS   rS   B  s?     %y$$Q'*!I!!!$'			q	!$r   rS   c                       e Zd ZdZ ej
                  dej                  j                        Z ej
                  dej                  j                        Z	y)MillisecondRangezRepresents a range of latency values in milliseconds.

  Fields:
    max: Output only. The maximum value of the range.
    min: Output only. The minimum value of the range.
  r	   r
   r   N
r   r   r   r   r   r   r   r   maxminr   r   r   ra   ra   P  I     		q)*;*;*A*AB#	q)*;*;*A*AB#r   ra   c                   |    e Zd ZdZ ej
                  d      Z ej
                  d      Z ej
                  d      Zy)r(   a  Model server information gives. Valid model server info combinations can
  be found using GkeInferenceQuickstart.FetchProfiles.

  Fields:
    model: Required. The model. Open-source models follow the Huggingface Hub
      `owner/model_name` format. Use GkeInferenceQuickstart.FetchModels to
      find available models.
    modelServer: Required. The model server. Open-source model servers use
      simplified, lowercase names (e.g., `vllm`). Use
      GkeInferenceQuickstart.FetchModelServers to find available servers.
    modelServerVersion: Optional. The model server version. Use
      GkeInferenceQuickstart.FetchModelServerVersions to find available
      versions. If not provided, the latest available version is used.
  r	   r   r   N)	r   r   r   r   r   r$   r@   rA   rB   r   r   r   r(   r(   \  s@      )


"%%	%%a(+,y,,Q/r   r(   c                       e Zd ZdZ ej
                  dd      Z ej
                  dd      Z ej
                  dd      Zy)rH   a  Performance range for a model deployment.

  Fields:
    ntpotRange: Output only. The range of NTPOT (Normalized Time Per Output
      Token) in milliseconds. NTPOT is the request latency normalized by the
      number of output tokens, measured as request_latency /
      total_output_tokens.
    throughputOutputRange: Output only. The range of throughput in output
      tokens per second. This is measured as
      total_output_tokens_generated_by_server / elapsed_time_in_seconds.
    ttftRange: Output only. The range of TTFT (Time To First Token) in
      milliseconds. TTFT is the time it takes to generate the first token for
      a request.
  ra   r	   TokensPerSecondRanger   r   N)	r   r   r   r   r   r   
ntpotRangethroughputOutputRange	ttftRanger   r   r   rH   rH   q  sI     &y%%&8!<*0)001GK$i$$%7;)r   rH   c                       e Zd ZdZ ej
                  dd      Z ej                  dej                  j                        Z
 ej                  dej                  j                        Zy)r>   a  Performance requirements for a profile and or model deployment.

  Fields:
    targetCost: Optional. The target cost for running a profile's model
      server. If not provided, this requirement will not be enforced.
    targetNtpotMilliseconds: Optional. The target Normalized Time Per Output
      Token (NTPOT) in milliseconds. NTPOT is calculated as `request_latency /
      total_output_tokens`. If not provided, this target will not be enforced.
    targetTtftMilliseconds: Optional. The target Time To First Token (TTFT) in
      milliseconds. TTFT is the time it takes to generate the first token for
      a request. If not provided, this target will not be enforced.
  r   r	   r   r
   r   N)r   r   r   r   r   r   
targetCostr   r   r   targetNtpotMillisecondstargetTtftMillisecondsr   r   r   r>   r>     s_     &y%%fa0*2I221i>O>O>U>UV1911!Y=N=N=T=TUr   r>   c                   z   e Zd ZdZ ej
                  ddd      Z ej                  dej                  j                        Z
 ej                  dej                  j                        Z ej                  d	ej                  j                        Z ej                  d
ej                  j                        Zy)PerformanceStatsa  Performance statistics for a model deployment.

  Fields:
    cost: Output only. The cost of running the model deployment.
    ntpotMilliseconds: Output only. The Normalized Time Per Output Token
      (NTPOT) in milliseconds. This is the request latency normalized by the
      number of output tokens, measured as request_latency /
      total_output_tokens.
    outputTokensPerSecond: Output only. The number of output tokens per
      second. This is the throughput measured as
      total_output_tokens_generated_by_server / elapsed_time_in_seconds.
    queriesPerSecond: Output only. The number of queries per second. Note:
      This metric can vary widely based on context length and may not be a
      reliable measure of LLM throughput.
    ttftMilliseconds: Output only. The Time To First Token (TTFT) in
      milliseconds. This is the time it takes to generate the first token for
      a request.
  r   r	   Tr.   r   r
   r   r   r=   N)r   r   r   r   r   r   costr   r   r   ntpotMillisecondsoutputTokensPerSecondr!   r"   queriesPerSecondttftMillisecondsr   r   r   rq   rq     s    & 
 		D	9$,i,,Q	8I8I8O8OP0)00I<M<M<S<ST)Y))!Y5F5F5L5LM+Y++Ay7H7H7N7NOr   rq   c                       e Zd ZdZ ej
                  d      Z ej
                  d      Z ej                  dd      Z	 ej                  ddd	      Z
 ej                  d
d      Z ej
                  d      Zy)r-   av  A profile containing information about a model deployment.

  Fields:
    acceleratorType: Output only. The accelerator type. Expected format:
      `nvidia-h100-80gb`.
    instanceType: Output only. The instance type. Expected format:
      `a2-highgpu-1g`.
    modelServerInfo: Output only. The model server configuration. Use
      GkeInferenceQuickstart.FetchProfiles to find valid configurations.
    performanceStats: Output only. The performance statistics for this
      profile.
    resourcesUsed: Output only. The resources used by the model deployment.
    tpuTopology: Output only. The TPU topology (if applicable).
  r	   r   r(   r   rq   r   Tr.   ResourcesUsedr=   r?   N)r   r   r   r   r   r$   rN   r)   r   r*   performanceStatsresourcesUsedtpuTopologyr   r   r   r-   r-     s     *I))!,/&&&q),*I**+<a@/+Y++,>DQ()((!<-%	%%a(+r   r-   c                   `    e Zd ZdZ ej
                  dej                  j                        Zy)rx   zResources used by a model deployment.

  Fields:
    acceleratorCount: Output only. The number of accelerators (e.g., GPUs or
      TPUs) used by the model deployment on the Kubernetes node.
  r	   r
   N)	r   r   r   r   r   r   r   r   acceleratorCountr   r   r   rx   rx     s*     ,Y++Ay7H7H7N7NOr   rx   c                       e Zd ZdZ G d dej
                        Z G d dej
                        Z ej                  dd      Z	 ej                  d      Z ej                  ddd	
      Z ej                  d      Z ej                  d      Z ej                  d      Z ej                  d      Z ej"                  dd
      Z ej                  d      Z ej                  d      Z ej                  d      Z ej                  d      Zy)StandardQueryParametersa4  Query parameters accepted by all methods.

  Enums:
    FXgafvValueValuesEnum: V1 error format.
    AltValueValuesEnum: Data format for response.

  Fields:
    f__xgafv: V1 error format.
    access_token: OAuth access token.
    alt: Data format for response.
    callback: JSONP
    fields: Selector specifying which fields to include in a partial response.
    key: API key. Your API key identifies your project and provides you with
      API access, quota, and reports. Required unless you provide an OAuth 2.0
      token.
    oauth_token: OAuth 2.0 token for the current user.
    prettyPrint: Returns response with indentations and line breaks.
    quotaUser: Available to use for quota purposes for server-side
      applications. Can be any arbitrary string assigned to a user, but should
      not exceed 40 characters.
    trace: A tracing token of the form "token:<tokenid>" to include in api
      requests.
    uploadType: Legacy upload protocol for media (e.g. "media", "multipart").
    upload_protocol: Upload protocol for media (e.g. "raw", "multipart").
  c                       e Zd ZdZdZdZdZy)*StandardQueryParameters.AltValueValuesEnumzData format for response.

    Values:
      json: Responses with Content-Type of application/json
      media: Media download with context-dependent Content-Type
      proto: Responses with Content-Type of application/x-protobuf
    r   r	   r   N)r   r   r   r   jsonmediaprotor   r   r   AltValueValuesEnumr     s     DEEr   r   c                       e Zd ZdZdZdZy)-StandardQueryParameters.FXgafvValueValuesEnumzVV1 error format.

    Values:
      _1: v1 error format
      _2: v2 error format
    r   r	   N)r   r   r   r   _1_2r   r   r   FXgafvValueValuesEnumr     s     
B	
Br   r   r	   r   r   r   )defaultr   r=   r?         T	   
         N)r   r   r   r   r   Enumr   r   	EnumFieldf__xgafvr$   access_tokenaltcallbackfieldskeyoauth_tokenBooleanFieldprettyPrint	quotaUsertrace
uploadTypeupload_protocolr   r   r   r   r     s   4
9>> 
inn  !Y  !8!<(&&&q),	0!VD#"Y""1%( 9  #&	a #%	%%a(+&	&&q$7+#i##A&)
)


#%$y$$R(*)I))"-/r   r   c                   X    e Zd ZdZ ej
                  d      Z ej
                  d      Zy)rM   a  Storage configuration for a model deployment.

  Fields:
    modelBucketUri: Optional. The Google Cloud Storage bucket URI to load the
      model from. This URI must point to the directory containing the model's
      config file (`config.json`) and model weights. A tuned GCSFuse setup can
      improve LLM Pod startup time by more than 7x. Expected format: `gs:///`.
    xlaCacheBucketUri: Optional. The URI for the GCS bucket containing the XLA
      compilation cache. If using TPUs, the XLA cache will be written to the
      same path as `model_bucket_uri`. This can speed up vLLM model
      preparation for repeated deployments.
  r	   r   N)r   r   r   r   r   r$   modelBucketUrixlaCacheBucketUrir   r   r   rM   rM     s.     )9((+.+i++A.r   rM   c                       e Zd ZdZ ej
                  dej                  j                        Z ej
                  dej                  j                        Z	y)rh   zRepresents a range of throughput values in tokens per second.

  Fields:
    max: Output only. The maximum value of the range.
    min: Output only. The minimum value of the range.
  r	   r
   r   Nrb   r   r   r   rh   rh   (  re   r   rh   r   z$.xgafvr   1r   2N)&r   
__future__r   apitools.base.protorpcliter   r   apitools.base.pyr   packageMessager   r   r'   r,   r2   r6   r9   r<   rG   rL   rR   rW   rY   r[   rS   ra   r(   rH   r>   rq   r-   rx   r   rM   rh   AddCustomJsonFieldMappingAddCustomJsonEnumMappingr   r   r   r   <module>r      sC   ' < % $Y $$*9 *2*9#4#4 **@I$5$5 @+y'8'8 + +	 1 1 + +)++ + $Q9,, $QN@I-- @*=y'8'8 =<-	(9(9 -('I4E4E ':'Y->-> '0'y'8'8 '("** "	Cy(( 	C0i'' 0*<y(( <*Vi// V&Py(( P6)i )0PI%% P<.i// <.~/I%% /$	C9,, 	C # " "Z4 ! ! !114> ! ! !114>r   