Skip to content

Commit

Permalink
Merge branch 'master' into int-id-redo
Browse files Browse the repository at this point in the history
  • Loading branch information
j-hagedorn committed Jan 16, 2018
2 parents 94755ac + 41168fe commit 61d60c6
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 175 deletions.
6 changes: 4 additions & 2 deletions global.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@
due <- as.Date("2017/09/30")

# Load de-identified data
scrub_sis <- read_feather("data/scrub_sis.feather")
scrub_sis <- read_feather("data/scrub_sis.feather")

# Change sis_date from factor to date
scrub_sis$sis_date <- as.Date(scrub_sis$sis_date, format="%Y-%m-%d")

# Get most recent SIS score
most_recent <- max(as.Date(scrub_sis$sis_date)[as.Date(scrub_sis$sis_date) <= Sys.Date()])

Expand Down Expand Up @@ -63,7 +66,6 @@
mutate(HCPCS = as.character(HCPCS))

# Load transformed dfs to break down TOS

q2 <- read_feather("data/q2.feather")
q3 <- read_feather("data/q3.feather")

Expand Down
5 changes: 2 additions & 3 deletions prep/readSIS.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
sis <-
sis_full %>%
# Filter Status == Completed
filter(statusText %in% c("COMPLETED")
& deleted == "False") %>%
filter(statusText %in% c("COMPLETED")) %>%
# Remove text fields
select(-ends_with("notes")) %>%
# Format datetime fields
Expand Down Expand Up @@ -152,7 +151,7 @@
age, gender, race, ethnic, address, sis_cl_st, # LivingSituation, LivingType,
# Assessment items
Q1A1_ExMedSupport:Q1A21_Other,
Q1B1_ExBehSupport:Q1B15_Other,
Q1B1_ExBehSupport:Q1B13_Other,
Q2A1_TOS:Q2F8_ImportantFor,
Q3A1_TOS:Q3A8_ImportantFor,
Q4A1v1:sis_s44n,
Expand Down
68 changes: 43 additions & 25 deletions prep/scrub.R
Original file line number Diff line number Diff line change
@@ -1,29 +1,47 @@
# scrub.R #

#### Aggregate data to allow presentation in open app
# Script to scrub PHI from SIS data to use with Shiny Apps

library(dplyr)
library(tidyr)
library(car)
# Make an ID key
mcaid_id <- unique(sub_sis$mcaid_id)
sis_key <- data.frame(mcaid_id)
sis_key$fake_id <- sample(x = 100000001:999999999,
size = length(sis_key$mcaid_id),
replace = FALSE)
sis_key$mcaid_id <- as.character(sis_key$mcaid_id)
sis_key$fake_id <- as.character(sis_key$fake_id)
rm(mcaid_id)

# Define dataset without PHI to use with Shiny Apps
# Make PHI-free dataset
scrub_sis <-
sub_sis %>%
mutate(mcaid_id = as.character(mcaid_id)) %>%
left_join(sis_key, by = "mcaid_id") %>%
select(-sis_id, -mcaid_id, -age, -gender, -race, -ethnic,-address,
# Rm comment fields for 'Other' needs areas
-ends_with("_Other")) %>%
droplevels() %>%
# Mutate factor variables (caused by reading in) to numeric
mutate_at(
.vars = vars(
starts_with("scr_"),
starts_with("Q1"),
starts_with("Q2"),
starts_with("Q3")
),
.funs = funs(as.character)
) %>%
mutate_at(
.vars = vars(
starts_with("scr_"),
starts_with("Q1"),
starts_with("Q2"),
starts_with("Q3")
),
.funs = funs(as.numeric)
)

# Make an ID key
mcaid_id <- unique(sub_sis$mcaid_id)
sis_key <- data.frame(mcaid_id)
sis_key$fake_id <- sample(x = 100000001:999999999,
size = length(sis_key$mcaid_id),
replace = FALSE)
sis_key$mcaid_id <- as.character(sis_key$mcaid_id)
sis_key$fake_id <- as.character(sis_key$fake_id)
rm(mcaid_id)

# Make PHI-free dataset
scrub_sis <-
sub_sis %>%
mutate(mcaid_id = as.character(mcaid_id)) %>%
left_join(sis_key, by = "mcaid_id") %>%
select(-sis_id, -mcaid_id, -age, -gender, -address)

# Write SIS Key and Scrubbed data to local workspace
write.csv(sis_key,"data/sis_key.csv", row.names = F)
write.csv(scrub_sis,"data/scrub_sis.csv", row.names = F)
# Write SIS Key and Scrubbed data to local workspace
write.csv(sis_key,"data/sis_key.csv", row.names = F)
library(feather)
write_feather(scrub_sis,"data/scrub_sis.feather")
4 changes: 2 additions & 2 deletions prep/sis_mappings.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ needs <-
Q1B7_ = "Sexual aggression",
Q1B8_ = "Sexually inappropriate",
Q1B9_ = "Outbursts",
Q1B10_ = "Wandering",
Q1B11_ = "Substance abuse",
Q1B10_ = "Substance abuse",
Q1B11_ = "Wandering",
Q1B12_ = "Mental health",
Q1B13_ = "Other Behavioral (1st)",
Q1B14_ = "Other Behavioral (2nd)",
Expand Down
63 changes: 48 additions & 15 deletions prep/transform.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@ q2_tos <-
ungroup() %>% droplevels() %>%
select(fake_id,PIHP,agency,sis_date,ends_with("tos")) %>%
select(fake_id,PIHP,agency,sis_date,starts_with("Q2")) %>%
mutate_each(funs(as.character), -fake_id, -PIHP, -agency, -sis_date) %>%
mutate_at(
.funs = funs(as.character),
.vars = vars(starts_with("Q2"))
) %>%
gather(item, tos, Q2A1_TOS:Q2F8_TOS) %>%
rename(type = tos) %>%
mutate(type_n = as.numeric(type),
item = gsub("TOS","",item),
id = as.factor(paste0(fake_id,item))
mutate(
type_n = as.numeric(type),
item = gsub("TOS","",item),
id = as.factor(paste0(fake_id,item))
) %>%
group_by(id, fake_id, PIHP, agency, sis_date, item, type) %>%
summarize(n = n_distinct(id),
Expand All @@ -31,7 +35,10 @@ q2_fqy <-
ungroup() %>% droplevels() %>%
select(fake_id, PIHP, agency, sis_date,ends_with("Fqy")) %>%
select(fake_id, PIHP, agency, sis_date, starts_with("Q2")) %>%
mutate_each(funs(as.character), -fake_id, -PIHP, -agency, -sis_date) %>%
mutate_at(
.funs = funs(as.character),
.vars = vars(starts_with("Q2"))
) %>%
gather(item, fqy, Q2A1_Fqy:Q2F8_Fqy) %>%
rename(frequency = fqy) %>%
mutate(frequency_n = as.numeric(frequency),
Expand All @@ -50,7 +57,10 @@ q2_dst <-
ungroup() %>% droplevels() %>%
select(fake_id, PIHP, agency, sis_date, ends_with("DST")) %>%
select(fake_id, PIHP, agency, sis_date, starts_with("Q2")) %>%
mutate_each(funs(as.character), -fake_id, -PIHP, -agency, -sis_date) %>%
mutate_at(
.funs = funs(as.character),
.vars = vars(starts_with("Q2"))
) %>%
gather(item, dst, Q2A1_DST:Q2F8_DST) %>%
rename(DST = dst) %>%
mutate(DST_n = as.numeric(DST),
Expand All @@ -69,7 +79,10 @@ q2_to <-
ungroup() %>% droplevels() %>%
select(fake_id, PIHP, agency,sis_date,ends_with("ImportantTo")) %>%
select(fake_id, PIHP, agency,sis_date,starts_with("Q2")) %>%
mutate_each(funs(as.character), -fake_id, -PIHP, -agency, -sis_date) %>%
mutate_at(
.funs = funs(as.character),
.vars = vars(starts_with("Q2"))
) %>%
gather(item, import_to, Q2A1_ImportantTo:Q2F8_ImportantTo) %>%
mutate(import_to_n = as.numeric(import_to),
item = gsub("ImportantTo","",item),
Expand All @@ -88,7 +101,10 @@ q2_for <-
ungroup() %>% droplevels() %>%
select(fake_id,PIHP, agency,sis_date,ends_with("ImportantFor")) %>%
select(fake_id,PIHP, agency,sis_date,starts_with("Q2")) %>%
mutate_each(funs(as.character), -fake_id, -PIHP, -agency, -sis_date) %>%
mutate_at(
.funs = funs(as.character),
.vars = vars(starts_with("Q2"))
) %>%
gather(item, import_for, Q2A1_ImportantFor:Q2F8_ImportantFor) %>%
mutate(import_for_n = as.numeric(import_for),
item = gsub("ImportantFor","",item),
Expand Down Expand Up @@ -164,7 +180,10 @@ q3_tos <-
ungroup() %>% droplevels() %>%
select(fake_id,PIHP,agency,sis_date,ends_with("TOS")) %>%
select(fake_id,PIHP,agency,sis_date,starts_with("Q3")) %>%
mutate_each(funs(as.character), -fake_id, -PIHP, -agency, -sis_date) %>%
mutate_at(
.funs = funs(as.character),
.vars = vars(starts_with("Q3"))
) %>%
gather(item, tos, Q3A1_TOS:Q3A8_TOS) %>%
rename(type = tos) %>%
mutate(type_n = as.numeric(type),
Expand All @@ -184,7 +203,10 @@ q3_fqy <-
ungroup() %>% droplevels() %>%
select(fake_id, PIHP, agency, sis_date, ends_with("Fqy")) %>%
select(fake_id, PIHP, agency, sis_date, starts_with("Q3")) %>%
mutate_each(funs(as.character), -fake_id, -PIHP, -agency, -sis_date) %>%
mutate_at(
.funs = funs(as.character),
.vars = vars(starts_with("Q3"))
) %>%
gather(item, fqy, Q3A1_Fqy:Q3A8_Fqy) %>%
rename(frequency = fqy) %>%
mutate(frequency_n = as.numeric(frequency),
Expand All @@ -203,7 +225,10 @@ q3_dst <-
ungroup() %>% droplevels() %>%
select(fake_id, PIHP, agency, sis_date, ends_with("DST")) %>%
select(fake_id, PIHP, agency, sis_date, starts_with("Q3")) %>%
mutate_each(funs(as.character), -fake_id, -PIHP, -agency, -sis_date) %>%
mutate_at(
.funs = funs(as.character),
.vars = vars(starts_with("Q3"))
) %>%
gather(item, dst, Q3A1_DST:Q3A8_DST) %>%
rename(DST = dst) %>%
mutate(DST_n = as.numeric(DST),
Expand All @@ -222,7 +247,10 @@ q3_to <-
ungroup() %>% droplevels() %>%
select(fake_id,PIHP,agency,sis_date,ends_with("ImportantTo")) %>%
select(fake_id,PIHP,agency,sis_date,starts_with("Q3")) %>%
mutate_each(funs(as.character), -fake_id, -PIHP, -agency, -sis_date) %>%
mutate_at(
.funs = funs(as.character),
.vars = vars(starts_with("Q3"))
) %>%
gather(item, import_to, Q3A1_ImportantTo:Q3A8_ImportantTo) %>%
mutate(import_to_n = as.numeric(import_to),
item = gsub("ImportantTo","",item),
Expand All @@ -241,7 +269,10 @@ q3_for <-
ungroup() %>% droplevels() %>%
select(fake_id,PIHP,agency,sis_date,ends_with("ImportantFor")) %>%
select(fake_id,PIHP,agency,sis_date,starts_with("Q3")) %>%
mutate_each(funs(as.character), -fake_id, -PIHP, -agency, -sis_date) %>%
mutate_at(
.funs = funs(as.character),
.vars = vars(starts_with("Q3"))
) %>%
gather(item, import_for, Q3A1_ImportantFor:Q3A8_ImportantFor) %>%
mutate(import_for_n = as.numeric(import_for),
item = gsub("ImportantFor","",item),
Expand Down Expand Up @@ -309,5 +340,7 @@ q3 <-
"Not endorsed")))) %>%
select(-import_to_n, -import_for_n, -n)

saveRDS(q2,"data/q2.rds")
saveRDS(q3,"data/q3.rds")
library(feather)
write_feather(q2,"data/q2.feather")
write_feather(q3,"data/q3.feather")

Loading

0 comments on commit 61d60c6

Please sign in to comment.