Skip to content

Commit

Permalink
Automation test for spark CLI samples (Azure#2377)
Browse files Browse the repository at this point in the history
* Enable test for submit_spark_standalone_jobs

* Generate workflow yaml

* update spark job files for automation test

* Add workflow for serverless spark with user identity job

* Add scripts to upload input data

* Update workflow to refer the script

* Update source file path

* Update workflow with correct file path

* Update working directory

* Update workflow

* Update the path

* Update the script to upload data

* Update the overwrite mode

* Update destination blob name

* Use blob upload batch

* Add spark pipeline tests

* Update spark component extension

* Add script to attache uai

* Update property name in workflow

* Update script parameters

* Update assign uai script

* Format the script

* Update setup identities script

* Update path to infra bootstraping

* Enable automation test for attached spark job

* Update resource path

* Update setup attached resource script

* Update script of setup resources

* Update setup attached resource script2

* Add logic to assign identity role

* Format the empty check

* Check if identity is empty

* Update to get compute properties

* update readme

* Reformat the script

* Update schema location and revert sdk notebook changes

* Attach pool first

* Rename resources and merge main

* Update format in yml

* Add role assigment to uid
  • Loading branch information
fredms committed Jul 6, 2023
1 parent 2cee822 commit 0b829b9
Show file tree
Hide file tree
Showing 38 changed files with 910 additions and 57 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# This code is autogenerated.
# Code is generated by running custom script: python3 readme.py
# Any manual changes to this file may cause incorrect behavior.
# Any manual changes will be overwritten if the code is regenerated.

name: cli-jobs-spark-attached-spark-pipeline-default-identity
on:
workflow_dispatch:
schedule:
- cron: "30 9/12 * * *"
pull_request:
branches:
- main
paths:
- cli/jobs/spark/**
- infra/bootstrapping/**
- .github/workflows/cli-jobs-spark-attached-spark-pipeline-default-identity.yml
- cli/jobs/spark/data/titanic.csv
- cli/setup.sh
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: check out repo
uses: actions/checkout@v2
- name: azure login
uses: azure/login@v1
with:
creds: ${{secrets.AZUREML_CREDENTIALS}}
- name: bootstrap resources
run: |
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
bash bootstrap.sh
working-directory: infra/bootstrapping
continue-on-error: false
- name: setup-cli
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash setup.sh
working-directory: cli
continue-on-error: true
- name: upload data
run: |
bash -x upload-data-to-blob.sh jobs/spark/
working-directory: cli
continue-on-error: true
- name: setup attached spark
working-directory: cli
continue-on-error: true
run: |
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark.yml
- name: run job
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash -x ../../run-job.sh attached-spark-pipeline-default-identity.yml
working-directory: cli/jobs/spark
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# This code is autogenerated.
# Code is generated by running custom script: python3 readme.py
# Any manual changes to this file may cause incorrect behavior.
# Any manual changes will be overwritten if the code is regenerated.

name: cli-jobs-spark-attached-spark-pipeline-managed-identity
on:
workflow_dispatch:
schedule:
- cron: "43 7/12 * * *"
pull_request:
branches:
- main
paths:
- cli/jobs/spark/**
- infra/bootstrapping/**
- .github/workflows/cli-jobs-spark-attached-spark-pipeline-managed-identity.yml
- cli/jobs/spark/data/titanic.csv
- cli/setup.sh
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: check out repo
uses: actions/checkout@v2
- name: azure login
uses: azure/login@v1
with:
creds: ${{secrets.AZUREML_CREDENTIALS}}
- name: bootstrap resources
run: |
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
bash bootstrap.sh
working-directory: infra/bootstrapping
continue-on-error: false
- name: setup-cli
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash setup.sh
working-directory: cli
continue-on-error: true
- name: upload data
run: |
bash -x upload-data-to-blob.sh jobs/spark/
working-directory: cli
continue-on-error: true
- name: setup identities
run: |
bash -x setup-identities.sh
working-directory: cli/jobs/spark
continue-on-error: true
- name: setup attached spark
working-directory: cli
continue-on-error: true
run: |
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-system-identity.yml
- name: run job
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash -x ../../run-job.sh attached-spark-pipeline-managed-identity.yml
working-directory: cli/jobs/spark
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# This code is autogenerated.
# Code is generated by running custom script: python3 readme.py
# Any manual changes to this file may cause incorrect behavior.
# Any manual changes will be overwritten if the code is regenerated.

name: cli-jobs-spark-attached-spark-pipeline-user-identity
on:
workflow_dispatch:
schedule:
- cron: "15 4/12 * * *"
pull_request:
branches:
- main
paths:
- cli/jobs/spark/**
- infra/bootstrapping/**
- .github/workflows/cli-jobs-spark-attached-spark-pipeline-user-identity.yml
- cli/jobs/spark/data/titanic.csv
- cli/setup.sh
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: check out repo
uses: actions/checkout@v2
- name: azure login
uses: azure/login@v1
with:
creds: ${{secrets.AZUREML_CREDENTIALS}}
- name: bootstrap resources
run: |
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
bash bootstrap.sh
working-directory: infra/bootstrapping
continue-on-error: false
- name: setup-cli
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash setup.sh
working-directory: cli
continue-on-error: true
- name: upload data
run: |
bash -x upload-data-to-blob.sh jobs/spark/
working-directory: cli
continue-on-error: true
- name: setup attached spark
working-directory: cli
continue-on-error: true
run: |
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-user-identity.yml
- name: run job
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash -x ../../run-job.sh attached-spark-pipeline-user-identity.yml
working-directory: cli/jobs/spark
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# This code is autogenerated.
# Code is generated by running custom script: python3 readme.py
# Any manual changes to this file may cause incorrect behavior.
# Any manual changes will be overwritten if the code is regenerated.

name: cli-jobs-spark-attached-spark-standalone-default-identity
on:
workflow_dispatch:
schedule:
- cron: "15 0/12 * * *"
pull_request:
branches:
- main
paths:
- cli/jobs/spark/**
- infra/bootstrapping/**
- .github/workflows/cli-jobs-spark-attached-spark-standalone-default-identity.yml
- cli/jobs/spark/data/titanic.csv
- cli/setup.sh
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: check out repo
uses: actions/checkout@v2
- name: azure login
uses: azure/login@v1
with:
creds: ${{secrets.AZUREML_CREDENTIALS}}
- name: bootstrap resources
run: |
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
bash bootstrap.sh
working-directory: infra/bootstrapping
continue-on-error: false
- name: setup-cli
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash setup.sh
working-directory: cli
continue-on-error: true
- name: upload data
run: |
bash -x upload-data-to-blob.sh jobs/spark/
working-directory: cli
continue-on-error: true
- name: setup attached spark
working-directory: cli
continue-on-error: true
run: |
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark.yml
- name: run job
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash -x ../../run-job.sh attached-spark-standalone-default-identity.yml
working-directory: cli/jobs/spark
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# This code is autogenerated.
# Code is generated by running custom script: python3 readme.py
# Any manual changes to this file may cause incorrect behavior.
# Any manual changes will be overwritten if the code is regenerated.

name: cli-jobs-spark-attached-spark-standalone-managed-identity
on:
workflow_dispatch:
schedule:
- cron: "16 1/12 * * *"
pull_request:
branches:
- main
paths:
- cli/jobs/spark/**
- infra/bootstrapping/**
- .github/workflows/cli-jobs-spark-attached-spark-standalone-managed-identity.yml
- cli/jobs/spark/data/titanic.csv
- cli/setup.sh
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: check out repo
uses: actions/checkout@v2
- name: azure login
uses: azure/login@v1
with:
creds: ${{secrets.AZUREML_CREDENTIALS}}
- name: bootstrap resources
run: |
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
bash bootstrap.sh
working-directory: infra/bootstrapping
continue-on-error: false
- name: setup-cli
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash setup.sh
working-directory: cli
continue-on-error: true
- name: upload data
run: |
bash -x upload-data-to-blob.sh jobs/spark/
working-directory: cli
continue-on-error: true
- name: setup identities
run: |
bash -x setup-identities.sh
working-directory: cli/jobs/spark
continue-on-error: true
- name: setup attached spark
working-directory: cli
continue-on-error: true
run: |
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-system-identity.yml
- name: run job
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash -x ../../run-job.sh attached-spark-standalone-managed-identity.yml
working-directory: cli/jobs/spark
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# This code is autogenerated.
# Code is generated by running custom script: python3 readme.py
# Any manual changes to this file may cause incorrect behavior.
# Any manual changes will be overwritten if the code is regenerated.

name: cli-jobs-spark-attached-spark-standalone-user-identity
on:
workflow_dispatch:
schedule:
- cron: "7 1/12 * * *"
pull_request:
branches:
- main
paths:
- cli/jobs/spark/**
- infra/bootstrapping/**
- .github/workflows/cli-jobs-spark-attached-spark-standalone-user-identity.yml
- cli/jobs/spark/data/titanic.csv
- cli/setup.sh
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: check out repo
uses: actions/checkout@v2
- name: azure login
uses: azure/login@v1
with:
creds: ${{secrets.AZUREML_CREDENTIALS}}
- name: bootstrap resources
run: |
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
bash bootstrap.sh
working-directory: infra/bootstrapping
continue-on-error: false
- name: setup-cli
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash setup.sh
working-directory: cli
continue-on-error: true
- name: upload data
run: |
bash -x upload-data-to-blob.sh jobs/spark/
working-directory: cli
continue-on-error: true
- name: setup attached spark
working-directory: cli
continue-on-error: true
run: |
bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-user-identity.yml
- name: run job
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash -x ../../run-job.sh attached-spark-standalone-user-identity.yml
working-directory: cli/jobs/spark
Loading

0 comments on commit 0b829b9

Please sign in to comment.