Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug when unpacking a subfolder from an archive #266

Merged
merged 7 commits into from
Oct 11, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Refactor the test function to make it a bit simpler
Move the checking of logs and logic for expected paths and log messages
to their own functions.
  • Loading branch information
leouieda committed Oct 1, 2021
commit 68e49cf7d3cd38c9eff3731df66ca65e66a792c7
1 change: 0 additions & 1 deletion pooch/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import gzip
import lzma
import shutil
from pathlib import Path
from zipfile import ZipFile
from tarfile import TarFile

Expand Down
75 changes: 45 additions & 30 deletions pooch/tests/test_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ def test_extractprocessor_fails():
@pytest.mark.parametrize(
"archive,members",
[
("store", None),
("tiny-data", ["tiny-data.txt"]),
("store", None),
("store", ["store/tiny-data.txt"]),
("store", ["store/subdir/tiny-data.txt"]),
("store", ["store/subdir"]),
Expand All @@ -146,46 +146,61 @@ def test_unpacking(processor_class, extension, target_path, archive, members):
processor = processor_class(members=members, extract_dir=target_path)
if target_path is None:
target_path = archive + extension + processor.suffix
with TemporaryDirectory() as local_store:
path = Path(local_store)
# Generate the appropriate expected paths and log message depending on
# the parameters for the test
if archive == "tiny-data":
true_paths = {str(path / target_path / "tiny-data.txt")}
log_lines = ["Extracting 'tiny-data.txt'"]
elif archive == "store" and members is None:
true_paths = {
str(path / target_path / "store" / "tiny-data.txt"),
str(path / target_path / "store" / "subdir" / "tiny-data.txt"),
}
name = processor_class.__name__
log_lines = [f"{name}{name[-1]}ing contents"]
elif archive == "store" and members is not None:
true_paths = []
for member in members:
true_path = path / target_path / Path(*member.split("/"))
if not str(true_path).endswith("tiny-data.txt"):
true_path = true_path / "tiny-data.txt"
true_paths.append(str(true_path))
true_paths = set(true_paths)
log_lines = [f"Extracting '{member}'" for member in members]
with TemporaryDirectory() as path:
path = Path(path)
true_paths, expected_log = _unpacking_expected_paths_and_logs(
archive, members, path / target_path, processor_class.__name__
)
# Setup a pooch in a temp dir
pup = Pooch(path=path, base_url=BASEURL, registry=REGISTRY)
# Capture logs and check for the right processor message
with capture_log() as log_file:
fnames = pup.fetch(archive + extension, processor=processor)
assert set(fnames) == true_paths
lines = log_file.getvalue().splitlines()
assert len(lines) == len(log_lines) + 1
assert lines[0].split()[0] == "Downloading"
for i, log_line in enumerate(log_lines):
assert lines[i + 1].startswith(log_line)
_check_logs(log_file, expected_log)
for fname in fnames:
check_tiny_data(fname)
# Check that processor doesn't execute when not downloading
with capture_log() as log_file:
fnames = pup.fetch(archive + extension, processor=processor)
assert set(fnames) == true_paths
assert log_file.getvalue() == ""
_check_logs(log_file, [])
for fname in fnames:
check_tiny_data(fname)


def _check_logs(log_file, expected_lines):
"""
Assert that the lines in the log match the expected ones.
"""
lines = log_file.getvalue().splitlines()
assert len(lines) == len(expected_lines)
for line, expected_line in zip(lines, expected_lines):
assert line.startswith(expected_line)


def _unpacking_expected_paths_and_logs(archive, members, path, name):
"""
Generate the appropriate expected paths and log message depending on the
parameters for the test.
"""
log_lines = ["Downloading"]
if archive == "tiny-data":
true_paths = {str(path / "tiny-data.txt")}
log_lines.append("Extracting 'tiny-data.txt'")
elif archive == "store" and members is None:
true_paths = {
str(path / "store" / "tiny-data.txt"),
str(path / "store" / "subdir" / "tiny-data.txt"),
}
log_lines.append(f"{name}{name[-1]}ing contents")
elif archive == "store" and members is not None:
true_paths = []
for member in members:
true_path = path / Path(*member.split("/"))
if not str(true_path).endswith("tiny-data.txt"):
true_path = true_path / "tiny-data.txt"
true_paths.append(str(true_path))
log_lines.append(f"Extracting '{member}'")
true_paths = set(true_paths)
return true_paths, log_lines