Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solved bugs in finetuning vision yamls. #2528

Merged
merged 1 commit into from
Aug 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: demo
instance_type: Standard_DS3_v2
instance_count: 1
instance_count: 1
liveness_probe:
initial_delay: 180
period: 180
failure_threshold: 49
timeout: 299
request_settings:
request_timeout_ms: 60000
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ fi
model_version=$(az ml model show --name $aml_registry_model_name --label $model_label --registry-name $registry_name --query version --output tsv)

# 4. Prepare data
python prepare_data.py
python prepare_data.py --subscription $subscription_id --group $resource_group_name --workspace $workspace_name
# training data
train_data="./data/training-mltable-folder"
# validation data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def read_image(image_path):
parser = argparse.ArgumentParser(description="Prepare data for image classification")

parser.add_argument("--subscription", type=str, help="Subscription ID")
parser.add_argument("--resource_group", type=str, help="Resource group name")
parser.add_argument("--group", type=str, help="Resource group name")
parser.add_argument("--workspace", type=str, help="Workspace name")
parser.add_argument("--data_path", type=str, default="./data", help="Dataset location")

Expand All @@ -165,8 +165,9 @@ def read_image(image_path):

sample_image = os.path.join(args.data_path, "fridgeObjects", "milk_bottle", "99.jpg")
huggingface_request_json = {
"inputs": {
"image": [base64.encodebytes(read_image(sample_image)).decode("utf-8")],
"input_data": {
"columns": ["image"],
"data": [base64.encodebytes(read_image(sample_image)).decode("utf-8")],
}
}
huggingface_request_file_name = "huggingface_sample_request_data.json"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: demo
instance_type: Standard_DS3_v2
instance_count: 1
instance_count: 1
liveness_probe:
initial_delay: 180
period: 180
failure_threshold: 49
timeout: 299
request_settings:
request_timeout_ms: 60000
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ fi
model_version=$(az ml model show --name $aml_registry_model_name --label $model_label --registry-name $registry_name --query version --output tsv)

# 4. Prepare data
python prepare_data.py
python prepare_data.py --subscription $subscription_id --group $resource_group_name --workspace $workspace_name
# training data
train_data="./data/training-mltable-folder"
# validation data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,9 @@ def read_image(image_path):

sample_image = os.path.join(args.data_path, "multilabelFridgeObjects", "images", "56.jpg")
huggingface_request_json = {
"inputs": {
"image": [base64.encodebytes(read_image(sample_image)).decode("utf-8")],
"input_data": {
"columns": ["image"],
"data": [base64.encodebytes(read_image(sample_image)).decode("utf-8")],
}
}
huggingface_request_file_name = "huggingface_sample_request_data.json"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: demo
instance_type: Standard_DS3_v2
instance_count: 1
instance_count: 1
liveness_probe:
initial_delay: 180
period: 180
failure_threshold: 49
timeout: 299
request_settings:
request_timeout_ms: 60000
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ gpus_per_node=1
# TODO: update the model name once it registered in preview registry
# using the latest version of the model - not working yet
mmdetection_model_name="mask_rcnn_swin-t-p4-w7_fpn_1x_coco"
model_version=1
model_label="latest"

version=$(date +%s)
finetuned_mmdetection_model_name="mask_rcnn_swin-t-p4-w7_fpn_1x_coco_fridge_is"
Expand All @@ -37,7 +37,6 @@ ds_finetune="./deepspeed_configs/zero1.json"
mmdetection_sample_request_data="./mmdetection_sample_request_data.json"

# finetuning job parameters
# TODO: update with preview registry component name
finetuning_pipeline_component="mmdetection_image_objectdetection_instancesegmentation_pipeline"

# Training settings
Expand Down Expand Up @@ -123,14 +122,15 @@ fi

# # 3. Check if the model exists in the registry
# # need to confirm model show command works for registries outside the tenant (aka system registry)
if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name
if ! az ml model show --name $mmdetection_model_name --label $model_label --registry-name $registry_name
then
echo "Model $mmdetection_model_name:$model_version does not exist in registry $registry_name"
echo "Model $mmdetection_model_name:$model_label does not exist in registry $registry_name"
exit 1
fi

# get the latest model version
model_version=$(az ml model show --name $mmdetection_model_name --label $model_label --registry-name $registry_name --query version --output tsv)
# 4. Prepare data
python prepare_data.py
python prepare_data.py --subscription $subscription_id --group $resource_group_name --workspace $workspace_name

# training data
train_data="./data/training-mltable-folder"
Expand Down Expand Up @@ -158,10 +158,10 @@ mmdetection_parent_job=$( az ml job create \
--file ./mmdetection-fridgeobjects-instance-segmentation-pipeline.yml \
$workspace_info \
--set \
jobs.mmdetection_model_finetune_job.component=$finetuning_pipeline_component \
jobs.mmdetection_model_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \
inputs.compute_model_import=$compute_cluster_model_import \
inputs.compute_finetune=$compute_cluster_finetune \
inputs.mlflow_model.path=$mmdetection_model_name \
inputs.mlflow_model.path="azureml://registries/$registry_name/models/$mmdetection_model_name/versions/$model_version" \
inputs.training_data.path=$train_data \
inputs.validation_data.path=$validation_data
) || {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: demo
instance_type: Standard_DS3_v2
instance_count: 1
instance_count: 1
liveness_probe:
initial_delay: 180
period: 180
failure_threshold: 49
timeout: 299
request_settings:
request_timeout_ms: 60000
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ gpus_per_node=1
# TODO: update the model name once it registered in preview registry
# using the latest version of the model - not working yet
mmdetection_model_name="yolof_r50_c5_8x8_1x_coco"
model_version=1
model_label="latest"

version=$(date +%s)
finetuned_mmdetection_model_name="yolof_r50_c5_8x8_1x_coco_fridge_od"
rjaincc marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -122,14 +122,15 @@ fi

# # 3. Check if the model exists in the registry
# # need to confirm model show command works for registries outside the tenant (aka system registry)
if ! az ml model show --name $model_name --version $model_version --registry-name $registry_name
if ! az ml model show --name $mmdetection_model_name --label $model_label --registry-name $registry_name
then
echo "Model $mmdetection_model_name:$model_version does not exist in registry $registry_name"
echo "Model $mmdetection_model_name:$model_label does not exist in registry $registry_name"
exit 1
fi

# get the latest model version
model_version=$(az ml model show --name $mmdetection_model_name --label $model_label --registry-name $registry_name --query version --output tsv)
# 4. Prepare data
python prepare_data.py
python prepare_data.py --subscription $subscription_id --group $resource_group_name --workspace $workspace_name

# training data
train_data="./data/training-mltable-folder"
Expand Down Expand Up @@ -157,10 +158,10 @@ mmdetection_parent_job=$( az ml job create \
--file ./mmdetection-fridgeobjects-detection-pipeline.yml \
$workspace_info \
--set \
jobs.mmdetection_model_finetune_job.component=$finetuning_pipeline_component \
jobs.mmdetection_model_finetune_job.component="azureml://registries/$registry_name/components/$finetuning_pipeline_component/labels/latest" \
inputs.compute_model_import=$compute_cluster_model_import \
inputs.compute_finetune=$compute_cluster_finetune \
inputs.mlflow_model.path=$mmdetection_model_name \
inputs.mlflow_model.path="azureml://registries/$registry_name/models/$mmdetection_model_name/versions/$model_version" \
inputs.training_data.path=$train_data \
inputs.validation_data.path=$validation_data
) || {
Expand Down