private evaporative cooling pEc

Dataset Inputs

Dataset Attributes


                

                
Creates a dataset with subjects in rows and variables in columns. Case-control status in last column named 'class'.
Choose a dataset with subjects in rows and variables in columns. Column labels should be in the first row and use R compatible names. Rows should be tab-delimted. All columns except class label are coreced into real numbers. Subject class labels should be in the last column. Class labels are coerced into factors.

pEC Algorithm Parameters

Evaporative Cooling parameters. Updating any one of these variables causes the Shiny app to update.

pEC Input Data

pEC Returned Accuracy

pEC Returned Number Correctly Detected

pEC Returned algo.acc

pEC Returned ggplot.data

show with app
library(shiny)
library(privateEC)
library(ggplot2)

shinyServer(function(input, output) {

  getDataset <- reactive({
    datasetsReady <- FALSE
    if (input$datasetsSrc == 1) {
      datasetsList <- createSimulation(num.samples = input$numSamples,
                                       num.variables = input$numVariables,
                                       pct.signals = input$pctSignals,
                                       bias = input$simBias,
                                       pct.train = input$pctTrain,
                                       pct.holdout = input$pctHoldout,
                                       pct.validation = input$pctValidation,
                                       verbose = FALSE)
      datasetsReady <- TRUE
    }
    if (input$datasetsSrc == 2) {
      if (is.null(input$datasetFile$datapath)) {
        return(NULL)
      }
      cat(input$datasetFile$datapath, "\n")
      fileDf <- read.table(input$datasetFile$datapath,
                           header = TRUE,
                           stringsAsFactors = FALSE)
      if (sum(which(input$txtClassLabel %in% colnames(fileDf))) != 1) {
        return(NULL)
      }
      datasetsList <- splitDataset(fileDf,
                                   class.label = input$txtClassLabel,
                                   pct.train = input$pctTrain,
                                   pct.holdout = input$pctHoldout,
                                   pct.validation = input$pctValidation)
      datasetsReady <- TRUE
    }
    list(split.data = datasetsList, split.success = datasetsReady)
  })

  runPec <- reactive({
    datasetsList <- getDataset()
    if (!datasetsList$split.success) {
      return(NULL)
    }
    pec.result <- NULL
    if (input$datasetsSrc == 1) {
      pec.result <- privateEC(train.ds = datasetsList$split.data$train,
                              holdout.ds = datasetsList$split.data$holdout,
                              validation.ds = datasetsList$split.data$validation,
                              label = input$txtClassLabel,
                              update.freq = input$updateFrequency,
                              rf.ntree = input$ntree,
                              rf.mtry = input$mtry,
                              start.temp = input$startTemp,
                              final.temp = input$finalTemp,
                              tau.param = input$ecTau,
                              corelearn.estimator = input$reliefEst,
                              is.simulated = TRUE,
                              signal.names = datasetsList$split.data$signal.names,
                              verbose = FALSE)
    } else {
      pec.result <- privateEC(train.ds = datasetsList$split.data$train,
                              holdout.ds = datasetsList$split.data$holdout,
                              validation.ds = datasetsList$split.data$validation,
                              label = input$txtClassLabel,
                              update.freq = input$updateFrequency,
                              rf.ntree = input$ntree,
                              rf.mtry = input$mtry,
                              start.temp = input$startTemp,
                              final.temp = input$finalTemp,
                              tau.param = input$ecTau,
                              corelearn.estimator = input$reliefEst,
                              is.simulated = FALSE,
                              verbose = FALSE)
    }
    pec.result
  })

  catDataFrameStats <- function(thisDF, titleText) {
    cat(titleText, "\n")
    cat("Rows:", nrow(thisDF), "Cols:", ncol(thisDF), "\n")
    cat("Subjects:", nrow(thisDF), "\n")
    num.cases <- sum(thisDF[, ncol(thisDF)] == 1)
    num.ctrls <- sum(thisDF[, ncol(thisDF)] != 1)
    cat("Cases:", num.cases, "\n")
    cat("Ctrls:", num.ctrls, "\n")
  }

  output$messagesOut <- renderPrint({
    datasetsList <- getDataset()
    if (!datasetsList$split.success) {
      return(NULL)
    }
    cat("Read data set dimensions:\n")
    catDataFrameStats(datasetsList$split.data$train, "Train")
    catDataFrameStats(datasetsList$split.data$holdout, "Holdout")
    catDataFrameStats(datasetsList$split.data$validation, "Validation")
  })

  output$runStdOut <- renderPrint({
    datasetsList <- getDataset()
    if (!datasetsList$split.success) {
      return(NULL)
    }
    pec.result <- runPec()
    cat("pEC elapsed time: ", pec.result$elapsed, "\n")
  })

  output$distPlot <- renderPlot({
    datasetsList <- getDataset()
    if (!datasetsList$split.success) {
      return(NULL)
    }
    numCols <- ncol(datasetsList$split.data$train)
    train.data <- datasetsList$split.data$train[, -numCols]
    holdout.data <- datasetsList$split.data$holdout[, -numCols]
    validation.data <- datasetsList$split.data$validation[, -numCols]
    if (input$pctValidation > 0) {
      par(mfrow = c(1, 3))
    } else {
      par(mfrow = c(1, 2))
    }
    hist(as.numeric(as.matrix(train.data)), main = "Train", xlab = "Value")
    hist(as.numeric(as.matrix(holdout.data)), main = "Holdout", xlab = "Value")
    if (input$pctValidation > 0) {
      hist(as.numeric(as.matrix(validation.data)), main = "Validation", xlab = "Value")
    }
  })

  output$ecResultsTable <- renderTable({
    datasetsList <- getDataset()
    if (!datasetsList$split.success) {
      return(NULL)
    }
    pec.result <- runPec()
    data.frame(vars.remain = as.integer(pec.result$algo.acc$vars.remain),
               train.acc = pec.result$algo.acc$train.acc,
               holdout.acc = pec.result$algo.acc$holdout.acc,
               validation.acc = pec.result$algo.acc$validation.acc)
  })

  output$ecCorrectTable <- renderTable({
    datasetsList <- getDataset()
    if (!datasetsList$split.success) {
      return(NULL)
    }
    pec.result <- runPec()
    if (input$datasetsSrc == 1) {
      data.frame(Remain = pec.result$algo.acc[, 1],
                 Correct = pec.result$correct)
    } else {
      data.frame(notsim = "not a simulation")
    }
  }, digits = 0)

  output$ecResultsPlot <- renderPlot({
    datasetsList <- getDataset()
    if (!datasetsList$split.success) {
      return(NULL)
    }
    pec.result <- runPec()
    plot(pec.result$algo.acc$vars.remain,
         pec.result$algo.acc$holdout.acc,
         col = "red", pch = 16, type = 'b', cex = 0.75,
         main = "One run of privateEC",
         ylim = c(0.05, 1.0),
         xlab = "Number of Attributes in Model",
         ylab = "Accuracy")
    points(pec.result$algo.acc$vars.remain,
           pec.result$algo.acc$train.acc,
           col = "green", pch = 1, type = 'b', cex = 0.75)
    points(pec.result$algo.acc$vars.remain,
           pec.result$algo.acc$validation.acc,
           col = "blue", pch = 4, type = 'b', cex = 0.75)
    legend("topright", c("Train", "Holdout", "Test"),
           pch = c(16, 1, 4), col = c("red", "green", "blue"), cex = 0.75)
  })

  output$ggplotPlot <- renderPlot({
    datasetsList <- getDataset()
    if (!datasetsList$split.success) {
      return(NULL)
    }
    pec.result <- runPec()
    p <- ggplot(pec.result$ggplot.data, aes(x = vars.remain,
                                            y = value,
                                            colour = variable)) +
      geom_point() + geom_line() + ggtitle("One run of privateEC") +
      xlab("Number of Attributes in Model") + ylab("Accuracy")
    print(p)
  })
})
# private-ec-app:
#
# Bill White - 8/1/17

library(shiny)

# Choices for the 'estimator'	parameter of attrEval() in the CORElearn package.
# The list is from the CORElearn command: infoCore(what="attrEval").
attr.eval.algs <- read.table("corelearn_attr_eval.tab", sep = "\t", header = TRUE,
                             stringsAsFactors = FALSE)

shinyUI(fluidPage(

  titlePanel("private evaporative cooling pEc"),

  sidebarLayout(
    sidebarPanel(
      h3("Dataset Inputs"),
      radioButtons("datasetsSrc",
                   "Data Sets",
                   c("Simulate datasets" = 1,
                     "Upload dataset to split" = 2),
                   selected = 1),
      h3("Dataset Attributes"),
      verbatimTextOutput("messagesOut", placeholder = TRUE),
      verbatimTextOutput("runStdOut", placeholder = TRUE),
      conditionalPanel(condition = "input.datasetsSrc == 1",
                       helpText("Creates a dataset with subjects in rows and variables in columns.",
                                "Case-control status in last column named 'class'."),
                       numericInput("numSamples", "Number of samples:",
                                    value = 100),
                       numericInput("numVariables", "Number of variables:",
                                    value = 100),
                       numericInput("pctSignals", "Proportion functional variables:",
                                    value = 0.1),
                       numericInput("pctTrain", "Percent training:",
                                    value = 0.34),
                       numericInput("pctHoldout", "Percent holdout:",
                                    value = 0.33),
                       numericInput("pctValidation", "Percent validation:",
                                    value = 0.33)
      ),
      conditionalPanel(condition = "input.datasetsSrc == 2",
                       helpText("Choose a dataset with subjects in rows and variables in columns.",
                                "Column labels should be in the first row and use R compatible names.",
                                "Rows should be tab-delimted.",
                                "All columns except class label are coreced into real numbers.",
                                "Subject class labels should be in the last column.",
                                "Class labels are coerced into factors."),
                       fileInput("datasetFile",
                                 label = "Select dataset: subject by variable"),
                       textInput("txtClassLabel", "Class label:", value = "class")
      ),
      conditionalPanel(condition = "datasetsReady",
                       h3("pEC Algorithm Parameters"),
                       helpText("Evaporative Cooling parameters. Updating any one
                                of these variables causes the Shiny app to update."),
                       numericInput("ecTau", "Tau:", value = 100),
                       numericInput("updateFrequency", "Temperature update frequency:", value = 5),
                       numericInput("startTemp", "Starting temperature:",
                                    value = 0.1),
                       numericInput("finalTemp", "Final temperature:",
                                    value = 0.00001),
                       numericInput("threshold", "Threshold:",
                                    value = 0.4),
                       numericInput("tolerance", "Tolerance:",
                                    value = 0.1),
                       numericInput("simBias", "Simulation bias:",
                                    value = 0.4),
                       selectInput("reliefEst",
                                   label = "Relief-F attribute evaluator estimator:",
                                   choices = attr.eval.algs,
                                   selected = "ReliefFequalK"),
                       numericInput("ntree", "RandomForest mtree:",
                                    value = 100),
                       numericInput("mtry", "RandomForest mtry:",
                                    value = 10)
      )
    ),

    mainPanel(
      h3("pEC Input Data"),
      plotOutput("distPlot"),
      h3("pEC Returned Accuracy"),
      tableOutput("ecResultsTable"),
      h3("pEC Returned Number Correctly Detected"),
      tableOutput("ecCorrectTable"),
      h3("pEC Returned algo.acc"),
      plotOutput("ecResultsPlot"),
      h3("pEC Returned ggplot.data"),
      plotOutput("ggplotPlot")
    )
  )
))
Code license: GPL-3