Skip to content

Commit

Permalink
Simplify zenodo publishing (frictionlessdata#1571)
Browse files Browse the repository at this point in the history
* Simplified zenodo publishing

* Fixed pakcage uploading logic

* Skip zenodo tests on windows

* Fixed zenodo result.url

* Fixed tests
  • Loading branch information
roll committed Jul 18, 2023
1 parent 2c4ea30 commit d138f26
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 150 deletions.
61 changes: 61 additions & 0 deletions data/cassettes/test_zenodo_adapter_write_without_metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
interactions:
- request:
body: '{}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '2'
Content-Type:
- application/json
User-Agent:
- python-requests/2.31.0
method: POST
uri: https://zenodo.org/api//deposit/depositions
response:
body:
string: '{"message":"The server could not verify that you are authorized to
access the URL requested. You either supplied the wrong credentials (e.g.
a bad password), or your browser doesn''t understand how to supply the credentials
required.","status":401}
'
headers:
Access-Control-Allow-Origin:
- '*'
Access-Control-Expose-Headers:
- Content-Type, ETag, Link, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Reset
Content-Length:
- '248'
Content-Type:
- application/json
Date:
- Tue, 18 Jul 2023 16:28:55 GMT
Referrer-Policy:
- strict-origin-when-cross-origin
Retry-After:
- '60'
Server:
- nginx
Strict-Transport-Security:
- max-age=0
X-Content-Type-Options:
- nosniff
X-Frame-Options:
- sameorigin
X-RateLimit-Limit:
- '60'
X-RateLimit-Remaining:
- '59'
X-RateLimit-Reset:
- '1689697796'
X-XSS-Protection:
- 1; mode=block
status:
code: 401
message: UNAUTHORIZED
version: 1
197 changes: 76 additions & 121 deletions frictionless/portals/zenodo/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@

import datetime
import json
import os
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, List, Union, cast

from ... import helpers, models
from ... import models
from ...catalog import Catalog, Dataset
from ...exception import FrictionlessException
from ...package import Package
from ...platform import platform
from ...resource import Resource
from ...system import Adapter
from .control import ZenodoControl
from .models import ZenodoCreator, ZenodoMetadata

if TYPE_CHECKING:
from pyzenodo3 import Record # type: ignore
Expand Down Expand Up @@ -52,83 +52,95 @@ def read_package(self) -> Package:

def write_package(self, package: Package):
client = platform.pyzenodo3_upload
client.BASE_URL = self.control.base_url

assert self.control.base_url
assert self.control.apikey
client.BASE_URL = self.control.base_url # type: ignore
metafn = self.control.metafn

if not metafn:
meta_data = generate_metadata(package)
with tempfile.NamedTemporaryFile("wt", delete=False) as file:
json.dump(meta_data, file, indent=2)
metafn = file.name

if metafn:
# Check if metadata is a JSON Object
if isinstance(metafn, dict):
meta_data = generate_metadata(metadata=metafn)
with tempfile.NamedTemporaryFile("wt", delete=False) as file:
json.dump(meta_data, file, indent=2)
metafn = file.name
# Ensure api key
if not self.control.apikey:
raise FrictionlessException("Api key is required for zenodo publishing")

try:
# Ensure deposition
deposition_id = self.control.deposition_id
if not deposition_id:
# Create a deposition resource
deposition_id = client.create( # type: ignore
token=self.control.apikey, base_url=self.control.base_url
deposition_id = cast(
int,
client.create(
token=self.control.apikey,
base_url=self.control.base_url,
),
)
metafn = Path(metafn).expanduser()
client.upload_meta( # type: ignore
token=self.control.apikey,
metafn=metafn,
depid=deposition_id, # type: ignore
)

# Process resources
resources: List[Path] = []
for key, resource in enumerate(package.resources):
if resource.data:
resource_file_name = f"{resource.name}.json" or f"resource{key}.json"
resource_path = os.path.join(
self.control.tmp_path or "", resource_file_name
# Generate metadata
if self.control.metafn:
descriptor = json.loads(Path(self.control.metafn).read_text())
meta = descriptor.get("metadata", {})
meta.setdefault("publication_date", str(datetime.date.today()))
meta.setdefault("access_right", "open")
metadata = ZenodoMetadata(**meta)
else:
description = self.control.description or package.description or "About"
license = "CC-BY-4.0"
if package.licenses:
license = package.licenses[0].get("name", license)
metadata = ZenodoMetadata(
title=self.control.title or package.title or "Title",
description=description,
license=license,
publication_date=str(datetime.date.today()),
)
if self.control.author:
metadata.creators.append(
ZenodoCreator(
name=self.control.author,
affiliation=self.control.company,
)
)
resource.to_json(resource_path)
resources.append(Path(resource_path).expanduser())
continue

resource_path = resource.path or ""
if resource_path.startswith(("http://", "https://")):
continue

if resource.basepath:
resource_path = os.path.join(
str(resource.basepath), str(resource.path)
for contributor in package.contributors:
metadata.creators.append(
ZenodoCreator(
name=contributor.get("title", "Title"),
affiliation=contributor.get("organization"),
)
)
resources.append(Path(resource_path).expanduser())
package_path = os.path.join(self.control.tmp_path or "", "datapackage.json")
package.to_json(package_path)

# Upload package and resources
client.upload_data( # type: ignore
token=self.control.apikey,
datafn=Path(package_path).expanduser(),
depid=deposition_id, # type: ignore
base_url=self.control.base_url,
)
for resource_path in resources:
resource_path = Path(resource_path).expanduser()
client.upload_data( # type: ignore
# Upload metadata
with tempfile.NamedTemporaryFile("wt") as file:
data = dict(metadata=metadata.model_dump(exclude_none=True))
json.dump(data, file, indent=2)
file.flush()
client.upload_meta(
token=self.control.apikey,
datafn=resource_path,
depid=deposition_id, # type: ignore
metafn=file.name,
depid=deposition_id,
)

# Upload package
with tempfile.TemporaryDirectory() as dir:
path = Path(dir) / "datapackage.json"
package.to_json(str(path))
client.upload_data(
token=self.control.apikey,
datafn=path,
depid=deposition_id,
base_url=self.control.base_url,
)

# Upload resource
for resource in package.resources:
if resource.normpath and not resource.remote:
client.upload_data(
token=self.control.apikey,
datafn=Path(resource.normpath),
depid=deposition_id,
base_url=self.control.base_url,
)

# Return result
return models.PublishResult(
url=f"https://zenodo.org/record/{deposition_id}",
url=f"https://zenodo.org/deposit/{deposition_id}",
context=dict(deposition_id=deposition_id),
)

except Exception as exception:
note = "Zenodo API error" + repr(exception)
raise FrictionlessException(note)
Expand Down Expand Up @@ -226,60 +238,3 @@ def get_package(record: Record, title: str, formats: List[str]) -> Package: # t
resource = Resource(path=file["key"]) # type: ignore
package.add_resource(resource)
return package


def generate_metadata(
package: Optional[Package] = None, *, metadata: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
meta_data: Union[str, Dict[str, Any], None] = {"metadata": {}}
if not metadata and not package:
note = "Zenodo API Metadata Creation error: Either metadata or package should be provided to generate metadata."
raise FrictionlessException(note)

if metadata:
if (
not metadata.get("title")
or not metadata.get("description")
or not metadata.get("creators")
):
note = "Zenodo API Metadata Creation error: missing title or description or creators."
raise FrictionlessException(note)

meta_data["metadata"] = metadata
if "keywords" not in meta_data["metadata"]:
meta_data["metadata"]["keywords"] = ["frictionlessdata"]

return helpers.remove_non_values(meta_data)

assert package

if not package.title or not package.description or not package.contributors:
note = "Zenodo API Metadata Creation error: Unable to read title or description or contributors from package descriptor."
raise FrictionlessException(note)

meta_data["metadata"] = {
"title": package.title,
"description": package.description,
"publication_date": package.created or str(datetime.datetime.now()),
"upload_type": "dataset",
"access_right": "open",
}
if package.licenses:
meta_data["metadata"]["creators"] = package.licenses[0].get("name") # type: ignore

creators: List[Dict[str, Any]] = []
for contributor in package.contributors:
creators.append(
{
"name": contributor.get("title"),
"affiliation": contributor.get("organization"),
}
)
keywords = package.keywords or []
if "frictionlessdata" not in package.keywords:
keywords.append("frictionlessdata")

if creators:
meta_data["metadata"]["creators"] = creators # type: ignore
meta_data["metadata"]["keywords"] = keywords # type: ignore
return helpers.remove_non_values(meta_data)
13 changes: 11 additions & 2 deletions frictionless/portals/zenodo/control.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,16 @@ class ZenodoControl(Control):
token has to have deposit:write access.
"""

base_url: Optional[str] = BASE_URL
base_url: str = BASE_URL
"""Endpoint for zenodo. By default it is set to live site (https://zenodo.org/api). For testing upload,
we can use sandbox for example, https://sandbox.zenodo.org/api. Sandbox doesnot work for
reading."""

title: Optional[str] = None
description: Optional[str] = None
author: Optional[str] = None
company: Optional[str] = None

bounds: Optional[str] = None
"""Return records filtered by a geolocation bounding box.
For example, (Format bounds=143.37158,-38.99357,146.90918,-37.35269)"""
Expand All @@ -70,6 +75,7 @@ class ZenodoControl(Control):
name: Optional[str] = None
"""Custom name for a catalog or a package. Default name is 'catalog' or 'package'"""

# TODO: remove
metafn: Optional[str] = None
"""Metadata file path for deposition resource. Deposition resource is used for uploading
and editing records on Zenodo."""
Expand Down Expand Up @@ -114,7 +120,10 @@ class ZenodoControl(Control):
"all_versions": {"type": "integer"},
"apikey": {"type": "string"},
"base_url": {"type": "string"},
"bounds": {"type": "string"},
"title": {"type": "string"},
"description": {"type": "string"},
"author": {"type": "string"},
"company": {"type": "string"},
"communities": {"type": "string"},
"deposition_id": {"type": "integer"},
"doi": {"type": "string"},
Expand Down
21 changes: 21 additions & 0 deletions frictionless/portals/zenodo/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from __future__ import annotations

from typing import List, Literal, Optional

from pydantic import BaseModel


class ZenodoCreator(BaseModel):
name: str
affiliation: Optional[str] = None
orcid: Optional[str] = None


class ZenodoMetadata(BaseModel):
title: str
description: str
publication_date: str
license: Optional[str] = None
upload_type: Literal["dataset"] = "dataset"
access_right: Literal["open"] = "open"
creators: List[ZenodoCreator] = []
Loading

0 comments on commit d138f26

Please sign in to comment.