Skip to content

Commit

Permalink
Merge pull request #48 from bhklab/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
jjjermiah committed Mar 28, 2024
2 parents 1014ee1 + 4ab514c commit e412900
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 30 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ Treatment-Annotation*.Rmd

./*.csv
CCLE_treatmentMetadata.csv
AnnotationGx.code-workspace
3 changes: 3 additions & 0 deletions AnnotationGx.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
"folders": [
{
"path": "."
},
{
"path": "../CoreGx"
}
],
"settings": {
Expand Down
4 changes: 2 additions & 2 deletions R/cellosaurus.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ mapCell2Accession <- function(

if (query_only) return(lapply(requests, function(req) req$url))

# perform the requests
# Submit requests using parallel httr2 since cellosaurus doesnt throttle
.info(funContext, "Performing Cellosaurus queries")
responses <- .perform_request_parallel(requests)
responses <- .perform_request_parallel(requests, progress = "Querying Cellosaurus...")
names(responses) <- as.character(ids) # in case its an numeric ID like cosmic ids
if (raw) return(responses)

Expand Down
30 changes: 21 additions & 9 deletions R/cellosaurus_annotations.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
#'
#' @param accessions The Cellosaurus accession to annotate.
#' @param to A character vector specifying the types of annotations to retrieve. Possible values include "id", "ac", "hi", "sy", "ca", "sx", "ag", "di", "derived-from-site", "misspelling", and "dt".
#'
#' @param query_only A logical value indicating whether to only return the query string.
#' @param raw A logical value indicating whether to return the raw response.
#'
#' @return A data frame containing the annotations for the cell line.
#'
#' @examples
Expand All @@ -14,10 +16,13 @@
#' @export
annotateCellAccession <- function(
accessions,
to = c("id", "ac", "hi", "sy", "ca", "sx", "ag", "di", "derived-from-site", "misspelling", "dt")
to = c("id", "ac", "hi", "sy", "ca", "sx", "ag", "di", "derived-from-site", "misspelling", "dt"),
query_only = FALSE, raw = FALSE
)
{
funContext <- .funContext("annotateCellAccession")

.info(funContext, "Building Cellosaurus requests...")
requests <- parallel::mclapply(accessions, function(accession) {
.build_cellosaurus_request(
query = accession,
Expand All @@ -29,19 +34,26 @@ annotateCellAccession <- function(
query_only = FALSE
)
})

responses <- .perform_request_parallel(requests)

.info(funContext, "Performing Requests...")
responses <- .perform_request_parallel(requests, progress = "Querying Cellosaurus...")
names(responses) <- accessions
responses_dt <- parallel::mclapply(accessions,function(name) {
if(raw) return(responses)

.info(funContext, "Parsing Responses...")
responses_dt <- parallel::mclapply(accessions, function(name) {
resp <- responses[[name]]
.parse_cellosaurus_lines(resp) |>
unlist(recursive = FALSE) |>
.processEntry() |>
.formatSynonyms()
}
) |> data.table::rbindlist(fill = TRUE)

responses_dt
}
)
names(responses_dt) <- accessions


responses_dt <- data.table::rbindlist(responses_dt, fill = TRUE)

return(responses_dt)
}

36 changes: 21 additions & 15 deletions R/pubchem_rest.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
#' @export
getPubchemCompound <- function(
ids, from = "cid", to = "property", properties = c("Title", "InChIKey"),
raw = FALSE, query_only = FALSE, output = "JSON", ...) {
funContext <- .funContext("getPubchemCompound")

raw = FALSE, query_only = FALSE, output = "JSON", ...
) {

funContext <- .funContext("getPubchemCompound")
to_ <- if (to == "property") {
checkmate::assert_atomic(properties, all.missing = FALSE)
checkmate::assert_character(properties)
Expand All @@ -33,33 +33,37 @@ getPubchemCompound <- function(
to
}

.info(funContext, "Building PubChem REST queries...")
requests <- lapply(ids, function(x) {
.build_pubchem_rest_query(
id = x, domain = "compound", namespace = from, operation = to_, output = output,
raw = raw, query_only = query_only, ...
)
})
if (query_only) {
return(requests)
}
if (query_only) return(requests)

tryCatch({
resps_raw <- httr2::req_perform_sequential(requests, on_error = "continue")
.info(funContext, "Retrieving compound information...")
resps_raw <- httr2::req_perform_sequential(
requests,
on_error = "continue",
progress = "Querying PubCHEM REST API...."
)
names(resps_raw) <- ids
}, error = function(e) {
.err(funContext, " An error occurred while retrieving the compound information:\n", e)
})

.debug(funContext, " Number of responses: ", length(resps_raw))
names(resps_raw) <- ids
if (raw) {
return(resps_raw)
}

if (raw) return(resps_raw)

# Parse the responses
.info(funContext, "Parsing PubChem REST responses...")
resps <- .parse_pubchem_rest_responses(resps_raw)
failed <- sapply(resps_raw, httr2::resp_is_error, USE.NAMES = T)

# filter failed
# if any query failed, return the failed queries as attributes
failed <- sapply(resps_raw, httr2::resp_is_error, USE.NAMES = T)
if (any(failed)) {
.warn(funContext, " Some queries failed. See the 'failed' object for details.")
failures <- lapply(resps_raw[failed], function(resp) {
Expand All @@ -69,7 +73,9 @@ getPubchemCompound <- function(
failures <- NULL
}

if (from != "name") {
# Combine the responses
# might be able to just do the else part...
if (from != "name") {
responses <- data.table::rbindlist(resps, fill = TRUE)
} else {
responses <- data.table::rbindlist(resps, idcol = from, fill = TRUE)
Expand All @@ -78,7 +84,7 @@ getPubchemCompound <- function(

attributes(responses)$failed <- failures

responses
return(responses)
}


Expand Down
9 changes: 7 additions & 2 deletions R/pubchem_view_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,14 @@
) {
funContext <- .funContext(".build_pubchem_view_query")


# Check the inputs
checkmate::assert_choice(
annotation,
c("data", "index", "annotations", "categories", "neighbors", "literature", "structure", "image", "qr", "linkout")
)
checkmate::assert_choice(record, c("compound", "substance", "assay", "cell", "gene", "protein"))

# Configure the options for the query
opts_ <- list()
if (!is.null(heading)) {
if (record == "substance") {
Expand Down Expand Up @@ -95,7 +96,11 @@

url |>
httr2::url_build() |>
.build_request()
httr2::request()

# url |>
# httr2::url_build() |>
# .build_request()
}

#' Generic function to parse one of the annotation helpers
Expand Down
2 changes: 1 addition & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,14 @@ articles:
navbar: Annotating Samples
contents:
- Cellosaurus
- OncoTree

- title: Compounds
navbar: Annotating Compounds
contents:
- PubChemAPI
- Unichem
- ChEMBL
- OncoTree

- title: Pipelines
navbar: Pipeline Tutorials
Expand Down
8 changes: 7 additions & 1 deletion man/annotateCellAccession.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e412900

Please sign in to comment.