From 5c61660bfef025a01da0ba51aa57def12f138e8a Mon Sep 17 00:00:00 2001 From: grajguru Date: Fri, 4 Aug 2023 13:52:57 +0530 Subject: [PATCH] Solved bugs in finetuning vision yamls. --- .../multiclass-classification/deploy.yaml | 9 ++++++++- ...rs-fridgeobjects-multiclass-classification.sh | 2 +- .../multiclass-classification/prepare_data.py | 7 ++++--- .../multilabel-classification/deploy.yaml | 9 ++++++++- ...rs-fridgeobjects-multilabel-classification.sh | 2 +- .../multilabel-classification/prepare_data.py | 5 +++-- .../image-instance-segmentation/deploy.yaml | 9 ++++++++- ...ection-fridgeobjects-instance-segmentation.sh | 16 ++++++++-------- .../finetune/image-object-detection/deploy.yaml | 9 ++++++++- .../mmdetection-fridgeobjects-detection.sh | 15 ++++++++------- 10 files changed, 57 insertions(+), 26 deletions(-) diff --git a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/deploy.yaml b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/deploy.yaml index b5884aa6cb..13f76e85a3 100644 --- a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/deploy.yaml +++ b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/deploy.yaml @@ -1,4 +1,11 @@ $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json name: demo instance_type: Standard_DS3_v2 -instance_count: 1 \ No newline at end of file +instance_count: 1 +liveness_probe: + initial_delay: 180 + period: 180 + failure_threshold: 49 + timeout: 299 +request_settings: + request_timeout_ms: 60000 \ No newline at end of file diff --git a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.sh b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.sh index 280d6e4042..0d8ed62b94 100644 --- a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.sh +++ b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/hftransformers-fridgeobjects-multiclass-classification.sh @@ -130,7 +130,7 @@ fi model_version=$(az ml model show --name $aml_registry_model_name --label $model_label --registry-name $registry_name --query version --output tsv) # 4. Prepare data -python prepare_data.py +python prepare_data.py --subscription $subscription_id --group $resource_group_name --workspace $workspace_name # training data train_data="./data/training-mltable-folder" # validation data diff --git a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py index 38770fb12c..72ba373066 100644 --- a/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-classification/multiclass-classification/prepare_data.py @@ -143,7 +143,7 @@ def read_image(image_path): parser = argparse.ArgumentParser(description="Prepare data for image classification") parser.add_argument("--subscription", type=str, help="Subscription ID") - parser.add_argument("--resource_group", type=str, help="Resource group name") + parser.add_argument("--group", type=str, help="Resource group name") parser.add_argument("--workspace", type=str, help="Workspace name") parser.add_argument("--data_path", type=str, default="./data", help="Dataset location") @@ -165,8 +165,9 @@ def read_image(image_path): sample_image = os.path.join(args.data_path, "fridgeObjects", "milk_bottle", "99.jpg") huggingface_request_json = { - "inputs": { - "image": [base64.encodebytes(read_image(sample_image)).decode("utf-8")], + "input_data": { + "columns": ["image"], + "data": [base64.encodebytes(read_image(sample_image)).decode("utf-8")], } } huggingface_request_file_name = "huggingface_sample_request_data.json" diff --git a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/deploy.yaml b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/deploy.yaml index b5884aa6cb..13f76e85a3 100644 --- a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/deploy.yaml +++ b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/deploy.yaml @@ -1,4 +1,11 @@ $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json name: demo instance_type: Standard_DS3_v2 -instance_count: 1 \ No newline at end of file +instance_count: 1 +liveness_probe: + initial_delay: 180 + period: 180 + failure_threshold: 49 + timeout: 299 +request_settings: + request_timeout_ms: 60000 \ No newline at end of file diff --git a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.sh b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.sh index a64330ef55..a4ecd27932 100644 --- a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.sh +++ b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/hftransformers-fridgeobjects-multilabel-classification.sh @@ -129,7 +129,7 @@ fi model_version=$(az ml model show --name $aml_registry_model_name --label $model_label --registry-name $registry_name --query version --output tsv) # 4. Prepare data -python prepare_data.py +python prepare_data.py --subscription $subscription_id --group $resource_group_name --workspace $workspace_name # training data train_data="./data/training-mltable-folder" # validation data diff --git a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py index 99c9878c7c..1cf05eda13 100644 --- a/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py +++ b/cli/foundation-models/system/finetune/image-classification/multilabel-classification/prepare_data.py @@ -170,8 +170,9 @@ def read_image(image_path): sample_image = os.path.join(args.data_path, "multilabelFridgeObjects", "images", "56.jpg") huggingface_request_json = { - "inputs": { - "image": [base64.encodebytes(read_image(sample_image)).decode("utf-8")], + "input_data": { + "columns": ["image"], + "data": [base64.encodebytes(read_image(sample_image)).decode("utf-8")], } } huggingface_request_file_name = "huggingface_sample_request_data.json" diff --git a/cli/foundation-models/system/finetune/image-instance-segmentation/deploy.yaml b/cli/foundation-models/system/finetune/image-instance-segmentation/deploy.yaml index b5884aa6cb..13f76e85a3 100644 --- a/cli/foundation-models/system/finetune/image-instance-segmentation/deploy.yaml +++ b/cli/foundation-models/system/finetune/image-instance-segmentation/deploy.yaml @@ -1,4 +1,11 @@ $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json name: demo instance_type: Standard_DS3_v2 -instance_count: 1 \ No newline at end of file +instance_count: 1 +liveness_probe: + initial_delay: 180 + period: 180 + failure_threshold: 49 + timeout: 299 +request_settings: + request_timeout_ms: 60000 \ No newline at end of file diff --git a/cli/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.sh b/cli/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.sh index 706f09fea6..1c10b79b76 100644 --- a/cli/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.sh +++ b/cli/foundation-models/system/finetune/image-instance-segmentation/mmdetection-fridgeobjects-instance-segmentation.sh @@ -23,7 +23,7 @@ gpus_per_node=1 # TODO: update the model name once it registered in preview registry # using the latest version of the model - not working yet mmdetection_model_name="mask_rcnn_swin-t-p4-w7_fpn_1x_coco" -model_version=1 +model_label="latest" version=$(date +%s) finetuned_mmdetection_model_name="mask_rcnn_swin-t-p4-w7_fpn_1x_coco_fridge_is" @@ -37,7 +37,6 @@ ds_finetune="./deepspeed_configs/zero1.json" mmdetection_sample_request_data="./mmdetection_sample_request_data.json" # finetuning job parameters -# TODO: update with preview registry component name finetuning_pipeline_component="mmdetection_image_objectdetection_instancesegmentation_pipeline" # Training settings @@ -123,14 +122,15 @@ fi # # 3. Check if the model exists in the registry # # need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name +if ! az ml model show --name $mmdetection_model_name --label $model_label --registry-name $registry_name then - echo "Model $mmdetection_model_name:$model_version does not exist in registry $registry_name" + echo "Model $mmdetection_model_name:$model_label does not exist in registry $registry_name" exit 1 fi - +# get the latest model version +model_version=$(az ml model show --name $mmdetection_model_name --label $model_label --registry-name $registry_name --query version --output tsv) # 4. Prepare data -python prepare_data.py +python prepare_data.py --subscription $subscription_id --group $resource_group_name --workspace $workspace_name # training data train_data="./data/training-mltable-folder" @@ -158,10 +158,10 @@ mmdetection_parent_job=$( az ml job create \ --file ./mmdetection-fridgeobjects-instance-segmentation-pipeline.yml \ $workspace_info \ --set \ - jobs.mmdetection_model_finetune_job.component=$finetuning_pipeline_component \ + jobs.mmdetection_model_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \ inputs.compute_model_import=$compute_cluster_model_import \ inputs.compute_finetune=$compute_cluster_finetune \ - inputs.mlflow_model.path=$mmdetection_model_name \ + inputs.mlflow_model.path="azureml://registries/$registry_name/models/$mmdetection_model_name/versions/$model_version" \ inputs.training_data.path=$train_data \ inputs.validation_data.path=$validation_data ) || { diff --git a/cli/foundation-models/system/finetune/image-object-detection/deploy.yaml b/cli/foundation-models/system/finetune/image-object-detection/deploy.yaml index b5884aa6cb..13f76e85a3 100644 --- a/cli/foundation-models/system/finetune/image-object-detection/deploy.yaml +++ b/cli/foundation-models/system/finetune/image-object-detection/deploy.yaml @@ -1,4 +1,11 @@ $schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json name: demo instance_type: Standard_DS3_v2 -instance_count: 1 \ No newline at end of file +instance_count: 1 +liveness_probe: + initial_delay: 180 + period: 180 + failure_threshold: 49 + timeout: 299 +request_settings: + request_timeout_ms: 60000 \ No newline at end of file diff --git a/cli/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-detection.sh b/cli/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-detection.sh index c643539ae0..0319c03155 100644 --- a/cli/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-detection.sh +++ b/cli/foundation-models/system/finetune/image-object-detection/mmdetection-fridgeobjects-detection.sh @@ -22,7 +22,7 @@ gpus_per_node=1 # TODO: update the model name once it registered in preview registry # using the latest version of the model - not working yet mmdetection_model_name="yolof_r50_c5_8x8_1x_coco" -model_version=1 +model_label="latest" version=$(date +%s) finetuned_mmdetection_model_name="yolof_r50_c5_8x8_1x_coco_fridge_od" @@ -122,14 +122,15 @@ fi # # 3. Check if the model exists in the registry # # need to confirm model show command works for registries outside the tenant (aka system registry) -if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name +if ! az ml model show --name $mmdetection_model_name --label $model_label --registry-name $registry_name then - echo "Model $mmdetection_model_name:$model_version does not exist in registry $registry_name" + echo "Model $mmdetection_model_name:$model_label does not exist in registry $registry_name" exit 1 fi - +# get the latest model version +model_version=$(az ml model show --name $mmdetection_model_name --label $model_label --registry-name $registry_name --query version --output tsv) # 4. Prepare data -python prepare_data.py +python prepare_data.py --subscription $subscription_id --group $resource_group_name --workspace $workspace_name # training data train_data="./data/training-mltable-folder" @@ -157,10 +158,10 @@ mmdetection_parent_job=$( az ml job create \ --file ./mmdetection-fridgeobjects-detection-pipeline.yml \ $workspace_info \ --set \ - jobs.mmdetection_model_finetune_job.component=$finetuning_pipeline_component \ + jobs.mmdetection_model_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \ inputs.compute_model_import=$compute_cluster_model_import \ inputs.compute_finetune=$compute_cluster_finetune \ - inputs.mlflow_model.path=$mmdetection_model_name \ + inputs.mlflow_model.path="azureml://registries/$registry_name/models/$mmdetection_model_name/versions/$model_version" \ inputs.training_data.path=$train_data \ inputs.validation_data.path=$validation_data ) || {