Merge pull request #28 from j-hagedorn/int-id-redo

Int id redo
j-hagedorn · Jan 16, 2018 · 1e6283a · 1e6283a
2 parents 41168fe + 61d60c6
commit 1e6283a
Show file tree

Hide file tree

Showing 4 changed files with 1,290 additions and 663 deletions.
diff --git a/global.R b/global.R
@@ -27,7 +27,6 @@
   due <- as.Date("2017/09/30")
 
 # Load de-identified data
-  # scrub_sis <- read.csv("data/scrub_sis.csv")
   scrub_sis <- read_feather("data/scrub_sis.feather")
 
 # Change sis_date from factor to date
@@ -36,6 +35,14 @@
 # Get most recent SIS score
   most_recent <- max(as.Date(scrub_sis$sis_date)[as.Date(scrub_sis$sis_date) <= Sys.Date()])
 
+# Define current interviewer as 
+  scrub_sis %<>%
+    group_by(interviewer) %>%
+    mutate(
+      current_int = max(sis_date) >= most_recent - 183 
+    ) %>%
+    ungroup()
+
 # Load totals
   totals <- read.csv("data/totals.csv")
 
@@ -45,12 +52,20 @@
 # Load service mapping table (created by "prep/sis_mappings.R" script)
   needs_matrix <- read.csv("data/needs_matrix.csv")
 
+# Transpose to allow joining by need item
+  need_to_hcpcs <- needs_matrix
+  rownames(need_to_hcpcs) <- need_to_hcpcs$Code
+  need_to_hcpcs %<>% 
+    select(-Code) %>% 
+    t() %>% 
+    as.data.frame()
+  need_to_hcpcs$item <- rownames(need_to_hcpcs)
+
 # Load HCPCS table (created by "prep/readServices.R" script)
-  codemap <- read.csv("data/codemap.csv")
+  codemap <- read.csv("data/codemap.csv") %>%
+    mutate(HCPCS = as.character(HCPCS))
 
 # Load transformed dfs to break down TOS
-  # q2 <- readRDS("data/q2.rds")
-  # q3 <- readRDS("data/q3.rds")
   q2 <- read_feather("data/q2.feather")
   q3 <- read_feather("data/q3.feather")
 
@@ -61,7 +76,9 @@
 
 
 ################################################################################ 
-# DEFINE FUNCTION: svs2sis (Service codes to SIS needs)
+# DEFINE FUNCTIONS: 
+
+# svs2sis (Service codes to SIS needs)
 
 # To get the SIS needs associated with a given HCPCS code
 # Assumes existence of needs_matrix df to map needs to svs
@@ -84,3 +101,73 @@
   # Example: 
   # res_svs <- svs2sis(c("T1020","H2016"))  
 
+## to_network()
+## Function to convert df to network data format ####
+
+  to_network <- function(df){
+
+    ntwk <- list()
+
+    ntwk$vals <-
+      df %>%
+      select(from,to,val = est_hrs_per_mo)
+
+    ntwk$nodes <-
+      unique(c(unique(as.character(df$from)),
+               unique(as.character(df$to)))) %>%
+      data.frame("name_id" = .) %>%
+      # Alphabetize
+      arrange(name_id) %>%
+      # Get values to size nodes
+      left_join(ntwk$vals, by = c("name_id" = "from")) %>%
+      left_join(ntwk$vals, by = c("name_id" = "to")) %>%
+      mutate(
+        val = ifelse(is.na(val.x) == T,val.y,val.x)
+      ) %>%
+      group_by(name_id) %>%
+      summarize(value = sum(val)) %>%
+      # Assign ids starting at 0
+      mutate(
+        label = name_id,
+        id = row_number(name_id)-1,
+        group = ifelse(name_id %in% df$from,"Needs","Services"),
+        title = ifelse(
+          group == "Services",
+          yes = paste0(
+            "Approximately ", round(value, digits = 1),
+            "<br>hours of ", tolower(name_id), 
+            "<br>could help me with various needs each month."
+          ),
+          no = paste0(
+            "I could benefit from approximately ", round(value, digits = 1),
+            "<br>hours of support for ", tolower(name_id), " each month."
+          )
+        )
+      )
+
+    ntwk$edges <-
+      df %>%
+      ungroup() %>%
+      left_join(ntwk$nodes, by = c("from" = "name_id")) %>%
+      rename(
+        name_from = from,
+        from = id
+      ) %>%
+      left_join(ntwk$nodes, by = c("to" = "name_id")) %>%
+      rename(
+        name_to = to,
+        to = id,
+        value = est_hrs_per_mo) %>%
+      mutate(
+        title = paste0(
+          "Approximately ", round(value, digits = 1),
+          "<br>hours of ", name_to, 
+          "<br>could be used to support me with ", name_from,
+          "<br>each month."
+        )
+      ) %>%
+      droplevels()
+
+    return(ntwk)
+
+  }
diff --git a/prep/readSIS.R b/prep/readSIS.R
@@ -81,16 +81,16 @@
       interviewer_orig = str_trim(tolower(assignedLoginId)),
       # Multiple fields exist to indicate the interviewer, each with different 
       # naming conventions and each with NA values in certain instances.
+      # The 'sis_int_id' is the primary field, but is usually an integer.
       # Since the 'Interviewer_login_id' is consistently used since 10/1/2016,
-      # we use that when available.  If that is absent, we use the person who
-      # created the assessment ('assignedLoginId')
-      interviewer = ifelse(is.na(Interviewer_login_id),
-                           yes = str_trim(tolower(assignedLoginId)),
-                           no = str_trim(tolower(Interviewer_login_id))),
-      # As a failsafe, we map any remaining NA values (currently zero) to 
-      # the last interviewer ID to modify the record
+      # we use that when available instead of the integer.  
+      interviewer = ifelse(is.na(as.integer(sis_int_id)) == F,
+                           yes = str_trim(tolower(Interviewer_login_id)),
+                           no = str_trim(tolower(sis_int_id))),
+      # If sis_int_id is NULL so is Interviewer_login_id, so we map any remaining 
+      # NA values to the interviewer e-mail field
       interviewer = ifelse(is.na(interviewer),
-                           yes = str_trim(tolower(lastModifiedByLoginId)),
+                           yes = str_trim(tolower(sis_int_email)),
                            no = interviewer)
     ) %>%
     mutate(