Skip to content

Commit

Permalink
[KED-1497] Add in bandit for security scanning as a pre-commit hook (k…
Browse files Browse the repository at this point in the history
  • Loading branch information
mzjp2 committed Mar 24, 2020
1 parent dea4b67 commit f2760e0
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 14 deletions.
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,9 @@ repos:
types: [file, python]
exclude: ^kedro/template/
entry: isort
- id: bandit
name: "Bandit security check"
language: system
types: [file, python]
exclude: ^kedro/template/|^tests/
entry: bandit -ll
2 changes: 1 addition & 1 deletion kedro/extras/datasets/pandas/gbq_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _describe(self) -> Dict[str, Any]:
)

def _load(self) -> pd.DataFrame:
sql = "select * from {}.{}".format(self._dataset, self._table_name)
sql = "select * from {}.{}".format(self._dataset, self._table_name) # nosec
return pd.read_gbq(
sql,
project_id=self._project_id,
Expand Down
2 changes: 1 addition & 1 deletion kedro/extras/datasets/pickle/pickle_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _load(self) -> Any:
load_path = get_filepath_str(self._get_load_path(), self._protocol)

with self._fs.open(load_path, mode="rb") as fs_file:
return pickle.loads(fs_file.read(), **self._load_args)
return pickle.loads(fs_file.read(), **self._load_args) # nosec

def _save(self, data: Any) -> None:
save_path = get_filepath_str(self._get_save_path(), self._protocol)
Expand Down
23 changes: 12 additions & 11 deletions kedro/extras/datasets/spark/spark_hive_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,24 +75,25 @@ def __init__(

def __enter__(self):
self._data.createOrReplaceTempView("tmp")

self._spark_session.sql(
"create table {stage_database_name}.{stage_table_name} as select * from tmp".format(
stage_database_name=self._stage_database_name,
"create table {stage_db_name}.{stage_table_name} as select * from tmp".format( # nosec
stage_db_name=self._stage_database_name,
stage_table_name=self._stage_table_name,
)
).take(1)
self.staged_data = self._spark_session.sql(
"select * from {stage_database_name}.{stage_table_name}".format(
stage_database_name=self._stage_database_name,
"select * from {stage_db_name}.{stage_table_name}".format( # nosec
stage_db_name=self._stage_database_name,
stage_table_name=self._stage_table_name,
)
)
return self

def __exit__(self, exc_type, exc_val, exc_tb):
self._spark_session.sql(
"drop table {stage_database_name}.{stage_table_name}".format(
stage_database_name=self._stage_database_name,
"drop table {stage_db_name}.{stage_table_name}".format(
stage_db_name=self._stage_database_name,
stage_table_name=self._stage_table_name,
)
)
Expand Down Expand Up @@ -208,12 +209,12 @@ def _get_spark() -> SparkSession:
def _create_empty_hive_table(self, data):
data.createOrReplaceTempView("tmp")
self._get_spark().sql(
"create table {database}.{table} select * from tmp limit 1".format(
"create table {database}.{table} select * from tmp limit 1".format( # nosec
table=self._table, database=self._database
)
)
self._get_spark().sql(
"truncate table {database}.{table}".format(
"truncate table {database}.{table}".format( # nosec
database=self._database, table=self._table
)
)
Expand All @@ -226,7 +227,7 @@ def _load(self) -> DataFrame:
)
)
return self._get_spark().sql(
"select * from {database}.{table}".format(
"select * from {database}.{table}".format( # nosec
database=self._database, table=self._table
)
)
Expand All @@ -246,7 +247,7 @@ def _save(self, data: DataFrame) -> None:
def _insert_save(self, data: DataFrame) -> None:
data.createOrReplaceTempView("tmp")
self._get_spark().sql(
"insert into {database}.{table} select {columns} from tmp".format(
"insert into {database}.{table} select {columns} from tmp".format( # nosec
database=self._database,
table=self._table,
columns=", ".join(self._table_columns),
Expand Down Expand Up @@ -282,7 +283,7 @@ def _overwrite_save(self, data: DataFrame) -> None:
self._get_spark().sql(
"truncate table {database}.{table}".format(
database=self._database, table=self._table
)
) # nosec
)
self._insert_save(data)

Expand Down
1 change: 1 addition & 0 deletions test_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
azure-storage-blob>=1.1.0, <2.0
azure-storage-file>=1.1.0, <2.0
azure-storage-queue>=1.1.0, <2.0
bandit>=1.6.2, <2.0
behave==1.2.6
biopython>=1.73, <2.0
black==v19.10.b0; python_version >= '3.6'
Expand Down
2 changes: 1 addition & 1 deletion tools/ipython/ipython_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def run_startup_scripts(startup_dir: pathlib.Path):
compiled = compile(
script.read_text(encoding="utf-8"), str(script), "exec"
)
exec(compiled, globals()) # pylint: disable=exec-used
exec(compiled, globals()) # pylint: disable=exec-used # nosec
except Exception as err: # pylint: disable=broad-except
logging.error(
"Startup script `%s` failed:\n%s: %s",
Expand Down

0 comments on commit f2760e0

Please sign in to comment.