Documentation
¶
Overview ¶
Copyright 2021 IBM Corporation
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2021 IBM Corporation ¶
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2021 IBM Corporation ¶
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2021 IBM Corporation ¶
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2021 IBM Corporation ¶
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2021 IBM Corporation ¶
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Index ¶
- Variables
- type AdapterConfiguration
- type TritonAdapterServer
- func (s *TritonAdapterServer) LoadModel(ctx context.Context, req *mmesh.LoadModelRequest) (*mmesh.LoadModelResponse, error)
- func (s *TritonAdapterServer) RuntimeStatus(ctx context.Context, req *mmesh.RuntimeStatusRequest) (*mmesh.RuntimeStatusResponse, error)
- func (s *TritonAdapterServer) UnloadModel(ctx context.Context, req *mmesh.UnloadModelRequest) (*mmesh.UnloadModelResponse, error)
Constants ¶
This section is empty.
Variables ¶
var ( TensorType = map[string]triton.DataType{ "INVALID": triton.DataType_TYPE_INVALID, modelschema.BOOL: triton.DataType_TYPE_BOOL, modelschema.UINT8: triton.DataType_TYPE_UINT8, modelschema.UINT16: triton.DataType_TYPE_UINT16, modelschema.UINT32: triton.DataType_TYPE_UINT32, modelschema.UINT64: triton.DataType_TYPE_UINT64, modelschema.INT8: triton.DataType_TYPE_INT8, modelschema.INT16: triton.DataType_TYPE_INT16, modelschema.INT32: triton.DataType_TYPE_INT32, modelschema.INT64: triton.DataType_TYPE_INT64, modelschema.FP16: triton.DataType_TYPE_FP16, modelschema.FP32: triton.DataType_TYPE_FP32, modelschema.FP64: triton.DataType_TYPE_FP64, modelschema.STRING: triton.DataType_TYPE_STRING, } )
Functions ¶
This section is empty.
Types ¶
type AdapterConfiguration ¶
type AdapterConfiguration struct {
Port int
TritonPort int
TritonContainerMemReqBytes int
TritonMemBufferBytes int
CapacityInBytes int
MaxLoadingConcurrency int
ModelLoadingTimeoutMS int
DefaultModelSizeInBytes int
ModelSizeMultiplier float64
RuntimeVersion string
LimitModelConcurrency int // 0 means no limit (default)
RootModelDir string
UseEmbeddedPuller bool
}
func GetAdapterConfigurationFromEnv ¶
func GetAdapterConfigurationFromEnv(log logr.Logger) (*AdapterConfiguration, error)
type TritonAdapterServer ¶
type TritonAdapterServer struct {
Client triton.GRPCInferenceServiceClient
Conn *grpc.ClientConn
Puller *puller.Puller
AdapterConfig *AdapterConfiguration
Log logr.Logger
// embed generated Unimplemented type for forward-compatibility for gRPC
mmesh.UnimplementedModelRuntimeServer
}
func NewTritonAdapterServer ¶
func NewTritonAdapterServer(runtimePort int, config *AdapterConfiguration, log logr.Logger) *TritonAdapterServer
func (*TritonAdapterServer) LoadModel ¶
func (s *TritonAdapterServer) LoadModel(ctx context.Context, req *mmesh.LoadModelRequest) (*mmesh.LoadModelResponse, error)
func (*TritonAdapterServer) RuntimeStatus ¶
func (s *TritonAdapterServer) RuntimeStatus(ctx context.Context, req *mmesh.RuntimeStatusRequest) (*mmesh.RuntimeStatusResponse, error)
func (*TritonAdapterServer) UnloadModel ¶
func (s *TritonAdapterServer) UnloadModel(ctx context.Context, req *mmesh.UnloadModelRequest) (*mmesh.UnloadModelResponse, error)