Automation test for spark CLI samples (Azure#2377)

* Enable test for submit_spark_standalone_jobs * Generate workflow yaml * update spark job files for automation test * Add workflow for serverless spark with user identity job * Add scripts to upload input data * Update workflow to refer the script * Update source file path * Update workflow with correct file path * Update working directory * Update workflow * Update the path * Update the script to upload data * Update the overwrite mode * Update destination blob name * Use blob upload batch * Add spark pipeline tests * Update spark component extension * Add script to attache uai * Update property name in workflow * Update script parameters * Update assign uai script * Format the script * Update setup identities script * Update path to infra bootstraping * Enable automation test for attached spark job * Update resource path * Update setup attached resource script * Update script of setup resources * Update setup attached resource script2 * Add logic to assign identity role * Format the empty check * Check if identity is empty * Update to get compute properties * update readme * Reformat the script * Update schema location and revert sdk notebook changes * Attach pool first * Rename resources and merge main * Update format in yml * Add role assigment to uid
shark1976 · Jul 6, 2023 · 0b829b9 · 0b829b9
1 parent 2cee822
commit 0b829b9
Show file tree

Hide file tree

Showing 38 changed files with 910 additions and 57 deletions.
diff --git a/.github/workflows/cli-jobs-spark-attached-spark-pipeline-default-identity.yml b/.github/workflows/cli-jobs-spark-attached-spark-pipeline-default-identity.yml
@@ -0,0 +1,61 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: cli-jobs-spark-attached-spark-pipeline-default-identity
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "30 9/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - cli/jobs/spark/**
+      - infra/bootstrapping/**
+      - .github/workflows/cli-jobs-spark-attached-spark-pipeline-default-identity.yml
+      - cli/jobs/spark/data/titanic.csv
+      - cli/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra/bootstrapping
+      continue-on-error: false
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: upload data
+      run: |
+          bash -x upload-data-to-blob.sh jobs/spark/
+      working-directory: cli
+      continue-on-error: true
+    - name: setup attached spark
+      working-directory: cli
+      continue-on-error: true
+      run: |
+          bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark.yml
+    - name: run job
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash -x ../../run-job.sh attached-spark-pipeline-default-identity.yml
+      working-directory: cli/jobs/spark
diff --git a/.github/workflows/cli-jobs-spark-attached-spark-pipeline-managed-identity.yml b/.github/workflows/cli-jobs-spark-attached-spark-pipeline-managed-identity.yml
@@ -0,0 +1,66 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: cli-jobs-spark-attached-spark-pipeline-managed-identity
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "43 7/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - cli/jobs/spark/**
+      - infra/bootstrapping/**
+      - .github/workflows/cli-jobs-spark-attached-spark-pipeline-managed-identity.yml
+      - cli/jobs/spark/data/titanic.csv
+      - cli/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra/bootstrapping
+      continue-on-error: false
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: upload data
+      run: |
+          bash -x upload-data-to-blob.sh jobs/spark/
+      working-directory: cli
+      continue-on-error: true
+    - name: setup identities
+      run: |
+          bash -x setup-identities.sh
+      working-directory: cli/jobs/spark
+      continue-on-error: true
+    - name: setup attached spark
+      working-directory: cli
+      continue-on-error: true
+      run: |
+          bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-system-identity.yml
+    - name: run job
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash -x ../../run-job.sh attached-spark-pipeline-managed-identity.yml
+      working-directory: cli/jobs/spark
diff --git a/.github/workflows/cli-jobs-spark-attached-spark-pipeline-user-identity.yml b/.github/workflows/cli-jobs-spark-attached-spark-pipeline-user-identity.yml
@@ -0,0 +1,61 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: cli-jobs-spark-attached-spark-pipeline-user-identity
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "15 4/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - cli/jobs/spark/**
+      - infra/bootstrapping/**
+      - .github/workflows/cli-jobs-spark-attached-spark-pipeline-user-identity.yml
+      - cli/jobs/spark/data/titanic.csv
+      - cli/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra/bootstrapping
+      continue-on-error: false
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: upload data
+      run: |
+          bash -x upload-data-to-blob.sh jobs/spark/
+      working-directory: cli
+      continue-on-error: true
+    - name: setup attached spark
+      working-directory: cli
+      continue-on-error: true
+      run: |
+          bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-user-identity.yml
+    - name: run job
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash -x ../../run-job.sh attached-spark-pipeline-user-identity.yml
+      working-directory: cli/jobs/spark
diff --git a/.github/workflows/cli-jobs-spark-attached-spark-standalone-default-identity.yml b/.github/workflows/cli-jobs-spark-attached-spark-standalone-default-identity.yml
@@ -0,0 +1,61 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: cli-jobs-spark-attached-spark-standalone-default-identity
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "15 0/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - cli/jobs/spark/**
+      - infra/bootstrapping/**
+      - .github/workflows/cli-jobs-spark-attached-spark-standalone-default-identity.yml
+      - cli/jobs/spark/data/titanic.csv
+      - cli/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra/bootstrapping
+      continue-on-error: false
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: upload data
+      run: |
+          bash -x upload-data-to-blob.sh jobs/spark/
+      working-directory: cli
+      continue-on-error: true
+    - name: setup attached spark
+      working-directory: cli
+      continue-on-error: true
+      run: |
+          bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark.yml
+    - name: run job
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash -x ../../run-job.sh attached-spark-standalone-default-identity.yml
+      working-directory: cli/jobs/spark
diff --git a/.github/workflows/cli-jobs-spark-attached-spark-standalone-managed-identity.yml b/.github/workflows/cli-jobs-spark-attached-spark-standalone-managed-identity.yml
@@ -0,0 +1,66 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: cli-jobs-spark-attached-spark-standalone-managed-identity
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "16 1/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - cli/jobs/spark/**
+      - infra/bootstrapping/**
+      - .github/workflows/cli-jobs-spark-attached-spark-standalone-managed-identity.yml
+      - cli/jobs/spark/data/titanic.csv
+      - cli/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra/bootstrapping
+      continue-on-error: false
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: upload data
+      run: |
+          bash -x upload-data-to-blob.sh jobs/spark/
+      working-directory: cli
+      continue-on-error: true
+    - name: setup identities
+      run: |
+          bash -x setup-identities.sh
+      working-directory: cli/jobs/spark
+      continue-on-error: true
+    - name: setup attached spark
+      working-directory: cli
+      continue-on-error: true
+      run: |
+          bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-system-identity.yml
+    - name: run job
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash -x ../../run-job.sh attached-spark-standalone-managed-identity.yml
+      working-directory: cli/jobs/spark
diff --git a/.github/workflows/cli-jobs-spark-attached-spark-standalone-user-identity.yml b/.github/workflows/cli-jobs-spark-attached-spark-standalone-user-identity.yml
@@ -0,0 +1,61 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: cli-jobs-spark-attached-spark-standalone-user-identity
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "7 1/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - cli/jobs/spark/**
+      - infra/bootstrapping/**
+      - .github/workflows/cli-jobs-spark-attached-spark-standalone-user-identity.yml
+      - cli/jobs/spark/data/titanic.csv
+      - cli/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra/bootstrapping
+      continue-on-error: false
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: upload data
+      run: |
+          bash -x upload-data-to-blob.sh jobs/spark/
+      working-directory: cli
+      continue-on-error: true
+    - name: setup attached spark
+      working-directory: cli
+      continue-on-error: true
+      run: |
+          bash -x jobs/spark/setup-attached-resources.sh resources/compute/attached-spark-user-identity.yml
+    - name: run job
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash -x ../../run-job.sh attached-spark-standalone-user-identity.yml
+      working-directory: cli/jobs/spark