2 Sample Tests

Linear Regression

show with app
# Name: Power.R
# Description: App to visualize Power under certain violations of assumptions
#
# Input:  --
# Output: --
#
# Created:  27-30/Mrz/2019, Julian, Chiara, Nino, Sahra

##########################################
# working directory
# setwd("C:/Users/Julian/Dropbox/Psychologie B.Sc/TquanT Seminar/Projekt_Power/merge")
# packages
library("shiny")
library("sn")
library("parallel")
library("lattice")
library("shinycssloaders")
library("shinydashboard")

# load custom functions

#### power_calculation and sampling for skewed normal
power_skewed_normal <- function(n, size_ratio_norm, scale1, skew1, effect_size_norm, scale2, skew2){
  
  #library("parallel")
  iter_sim <- 1000  ## vllt spter anpassen
  
  #n <- c(20,120)
  #effect_size_norm <- 1
  #size_ratio_norm  <- 1
  #scale1 <- 3
  #skew1 <- 0
  #scale2 <- 3
  #skew2 <- 0
  
  #Parallelisieren in R auf mehreren Clustern 
  cores <- detectCores() - 1
  #Cluster erstellen
  clu <- makeCluster(cores)
  #Pakete in jedes Cluster laden
  clusterEvalQ(clu,{
    library("sn")
    sim_data <- function(n, size_ratio_norm, scale1, skew1, effect_size_norm, scale2, skew2){
      s1 <- rsn(n * size_ratio_norm, 0,
                scale1, skew1) # location, scale, skew
      s2 <- rsn(n, effect_size_norm, scale2, skew2)
      return(list(s1, s2))
    }
  })
  
  #Daten in jedes Cluster laden
  clusterExport(clu,c("n", "iter_sim", "size_ratio_norm", "scale1", "skew1", "effect_size_norm", "scale2", "skew2"), envir=environment())
  #Power t, welch, wilcox
  power_t <- c()
  power_welch <- c()
  power_wilcox <- c()
  for(j in round(seq(n[1], n[2], length.out = 10))){
    clusterExport(clu, c("j"), envir=environment())
    ps <- parSapply(clu, 1:iter_sim, function(i,...){
      .t <- sim_data(j, size_ratio_norm, scale1, skew1, effect_size_norm, scale2, skew2)
      list(
        t.test(.t[[1]], .t[[2]], var.equal = T)$p.value,
        t.test(.t[[1]], .t[[2]], var.equal = F)$p.value,
        wilcox.test(.t[[1]], .t[[2]])$p.value
      )          
    })
    .p <- apply(ps, 1, function(x){mean(x < .05)})
    power_t      <- c(power_t, .p[1])
    power_welch  <- c(power_welch, .p[2])
    power_wilcox <- c(power_wilcox, .p[3])
  }
  
  #Cluster stoppen!!!
  stopCluster(clu)
  
  
  
  power_p <- data.frame(power = c(power_t, power_welch, power_wilcox),
                        test = rep(c("t", "welch", "wilcox"), each = length(round(seq(n[1], n[2], length.out = 10)))),
                        n =  round(seq(n[1], n[2], length.out = 10)))
  
  
  return(power_p)
  
}


#### power_calculation and sampling for chisq
power_chisq <- function(n, size_ratio_chisq, df1, ncp1, df2, ncp2){
  
  iter_sim <- 1000
  #Parallelisieren in R auf mehreren Clustern 
  cores <- detectCores() - 1
  #Cluster erstellen
  clu <- makeCluster(cores)
  #Pakete in jedes Cluster laden
  clusterEvalQ(clu,{
    sim_data <- function(n,  size_ratio_chisq, df1, ncp1, df2, ncp2){
      s1 <- rchisq(n * size_ratio_chisq, df1, ncp1) # df, non centrality paramter
      s2 <- rchisq(n, df2, ncp2)
      return(list(s1, s2))
    }
  })
  
  #Daten in jedes Cluster laden
  clusterExport(clu,c("n", "iter_sim", "size_ratio_chisq", "df1", "ncp1", "df2", "ncp2"), envir=environment())
  #Power t, welch, wilcox
  power_t <- c()
  power_welch <- c()
  power_wilcox <- c()
  for(j in round(seq(n[1], n[2], length.out = 10))){
    clusterExport(clu, c("j"), envir=environment())
    ps <- parSapply(clu, 1:iter_sim, function(i,...){
      .t <- sim_data(j, size_ratio_chisq, df1, ncp1, df2, ncp2)
      list(
        t.test(.t[[1]], .t[[2]], var.equal = T)$p.value,
        t.test(.t[[1]], .t[[2]], var.equal = F)$p.value,
        wilcox.test(.t[[1]], .t[[2]])$p.value
      )          
    })
    .p <- apply(ps, 1, function(x){mean(x < .05)})
    power_t <-    c(power_t, .p[1])
    power_welch <- c(power_welch, .p[2])
    power_wilcox <- c(power_wilcox, .p[3])
  }
  
  #Cluster stoppen!!!
  stopCluster(clu)
  
  
  
  power_p <- data.frame(power = c(power_t, power_welch, power_wilcox),
                        test = rep(c("t", "welch", "wilcox"), each = length(round(seq(n[1], n[2], length.out = 10)))),
                        n = round(seq(n[1], n[2], length.out = 10)))
  
  
  return(power_p)
}


#########################
#########################
#########################

ui <- dashboardPage(title = "Power and Violation of Assumptions",
                    skin = "black",
                    dashboardHeader(title = "Power and Violation of Assumptions",
                                    titleWidth = 450),
                    dashboardSidebar(
                      sidebarMenu(
                        menuItem("2 Sample Tests", tabName = "first_app"),
                        menuItem("Linear Regression", tabName = "second_app")
                      )
                    ),
                    dashboardBody(
                      tabItems(
                        tabItem(tabName = "first_app",
    includeScript("../../../Matomo-tquant.js"),
                                h4("2 Sample Tests"),
                                ###### first app
                                fluidRow(
                                  box(
                                    selectInput("distro_choosen", "Change Distribution to sample from", 
                                                c("Skewed Normal", "Chi-Squared") , 
                                                selected = "Skewed Normal"),
                                    uiOutput("distro_setting") 
                                    ###### uiOutput("power_setting"),
                                    
                                    
                                  ),
                                  tabBox(id = "tabs_active", type = "tabs",
                                         tabPanel("Distributions", 
                                                  plotOutput("distro_plot")),
                                         tabPanel("Power",
                                                  actionButton("Update", "Calculate Power"),
                                                  withSpinner(plotOutput("power_plot"))
                                                  )
                                         
                                  )
                                  
                                  
                                )
                                
                                
                                
                                ######
                        ),
                        tabItem(tabName = "second_app",
                                h4("Linear Regression"),
                                ###### second appp
                                fluidRow(
                                  box(selectInput("type", "Choose your regression type", c("Quadratic","Cubic"), multiple = FALSE),
                                      uiOutput("parameters")
                                  ),
                                  
                                  tabBox(type = "tabs",
                                         tabPanel("Distribution", plotOutput("dis")),
                                         tabPanel("Power", withSpinner(plotOutput("pow1")), textOutput("pow2")),
                                         tabPanel("Violations", plotOutput("vio"))))
                        )
                      )
                    )
)

server <- function(input,output,server){
  ###### first app
  
  sim_power <- eventReactive(input$Update,{ #simulate data
    if(input$distro_choosen == "Skewed Normal"){
      sim <- power_skewed_normal(n = input$n, input$size_ratio_norm, 
                                 input$scale1, input$skew1, input$effect_size_norm, 
                                 input$scale2, input$skew2)
      
    }else if(input$distro_choosen == "Chi-Squared"){
      sim <- power_chisq(input$n, input$size_ratio_chisq, input$df1, input$ncp1, input$df2, input$ncp2)
    }
    return(sim)
  })
  
  output$power_plot <- renderPlot({

      
      #xyplot(power ~ jitter(n, 0.01), data = sim_power(), groups = test, 
      #       type = c("p"), auto.key = T, xlab = "Samplesize N1", ylab = "Power")
      plot(power ~ jitter(n, 0.03), data = sim_power()[sim_power()$test == "t", ],
           col = "red", xlab = "Samplesize N1", pch = 16, ylim = c(0, 1), 
           ylab = "Power")
      points(power ~ jitter(n, 0.03), data = sim_power()[sim_power()$test == "welch", ],
           col = "blue",  pch = 16)  
      points(power ~ jitter(n, 0.03), data = sim_power()[sim_power()$test == "wilcox", ],
           col = "green",  pch = 16)
      

      isolate({
        if(input$distro_choosen == "Skewed Normal" && input$size_ratio_norm == 1){
            var_theo1 <- input$scale1^2 * (1 - (2 * (input$skew1/sqrt(1 + input$skew1^2))^2)/pi)
            var_theo2 <- input$scale2^2 * (1 - (2 * (input$skew2/sqrt(1 + input$skew2^2))^2)/pi)
            sd_pooled <- sqrt((((input$n * input$size_ratio_norm -1)* var_theo1) + ((input$n -1) * var_theo2))/(input$n + ((input$n * input$size_ratio_norm) - 2)))

            p_theo <- power.t.test(n = input$n[1]:input$n[2], 
                             delta = abs(input$scale1 * (input$skew1/sqrt(1 + input$skew1^2)) - (input$effect_size_norm + input$scale2 * (input$skew2/sqrt(1 + input$skew2^2)))), 
                               sd = sd_pooled, .05)$power
            points(p_theo ~ I(input$n[1]:input$n[2]), type = "l")    
      }
      if(input$distro_choosen == "Chi-Squared" && input$size_ratio_chisq == 1){
        var_theo1 <- 2 * (input$df1 + 2 * input$ncp1)
        var_theo2 <- 2 * (input$df2 + 2 * input$ncp2)
        sd_pooled <- sqrt((((input$n * input$size_ratio_chisq -1)* var_theo1) + ((input$n -1) * var_theo2))/(input$n + ((input$n * input$size_ratio_chisq) - 2)))

        p_theo <- power.t.test(n = input$n[1]:input$n[2], 
                             delta = abs((input$df1 + input$ncp1) - 
        (input$df2 + input$ncp2)), 
                               sd = sd_pooled, .05)$power
       points(p_theo ~ I(input$n[1]:input$n[2]), type = "l")
      }
    })
      
     legend("bottomright", legend = c("T-test; Normal distributed", "T-test", "Welch-test", "Wilcox-test"), lty = 1,
             col = c("black", "red", "blue", "green"), bty = "n") 
    
  })

 output$power_setting <- renderUI({
     if(input$distro_choosen == "Chi-Squared" & input$tabs_active == "Power"){
         tagList(
         sliderInput("n", "Sample size Range", min = 2, max = 600, value = c(2, 150)),
         sliderInput("size_ratio_chisq", "Ratio of Sample Size (N1/N2)", min =1, max = 10, value = 1))
     }
    else if(input$distro_choosen == "Skewed Normal" & input$tabs_active == "Power"){
        tagList(
        sliderInput("n", "Sample size Range", min = 2, max = 600, value = c(2, 150)),
        sliderInput("size_ratio_norm", "Ratio of Sample Size (N1/N2)", min =1, max = 10, value = 1)
        )
    }

 }) 
 output$diff_means <- reactive({
     if(input$distro_choosen == "Skewed Normal"){
                     round(abs(input$scale1 * (input$skew1/sqrt(1 + input$skew1^2)) - (input$effect_size_norm + input$scale2 * (input$skew2/sqrt(1 + input$skew2^2)))), 3) 
     }
     else if(input$distro_choosen == "Chi-Squared"){
        round(abs((input$df1 + input$ncp1) - 
        (input$df2 + input$ncp2)))
     }
 })
  
  output$distro_setting <- renderUI({  #adjust UI according to distro choosen
    if(input$distro_choosen == "Skewed Normal" ){
      #s1 <- rsnorm(n1, 0, input$scale1, input$skew1) # location, scale, skew
      #s2 <- rsnorm(n2, input$effect_size, input$scale2, input$skew2)
      tagList(
        uiOutput("power_setting"),
        sliderInput("effect_size_norm", "Difference in Location",      min =-20, max = 20, value = 2),
        tags$b("True Difference in means:"),
        textOutput("diff_means"),   
     
        
        column(6,
               h4("Sample 1", aligne = "center"),
               sliderInput("scale1", "Scale",   min =0.01, max = 20, value = 10),
               sliderInput("skew1", "Skewness", min =-10, max = 10, value = 0)
        ),
        column(6,
               h4("Sample 2", aligne = "center"),
               sliderInput("scale2", "Scale",   min =0.01, max = 20, value = 10),
               sliderInput("skew2", "Skewness", min =-10, max = 10, value = 0)
        )
        
      )
    }else if(input$distro_choosen == "Chi-Squared"){
      #s1 <- rexp(n1,  input$rate1) # rate
      #s2 <- rexp(n2,  input$rate2)
      #                 input$df1, input$ncp1) # df, non centrality paramter
      # s2 <- rchisq(n, input$df2, input$ncp2)
      tagList(
        uiOutput("power_setting"),      
        tags$b("True Difference in means:"),
        textOutput("diff_means"),
        column(6,
               h4("Sample 1", aligne = "center"),
               sliderInput("df1", "Degrees of freedom",        min =0, max = 20, value = 5),
               sliderInput("ncp1", "Non-Centrality parameter", min =0, max = 10, value = 0)
        ),
        column(6,
               h4("Sample 2", aligne = "center"),
               sliderInput("df2", "Degrees of freedom",        min =0, max = 20, value = 5),
               sliderInput("ncp2", "Non-Centrality parameter", min =0, max = 10, value = 0)
        )
      )
    }
    
  })
  
  output$distro_plot <- renderPlot({
    #if(min(input$scale1, input$scale2) == 0){
    #    plot(c(1, 1), type = "n", axes = F, xlab = "", ylab = "")
    #    text(1, 1, "Scale cannot be zero")
    #}else 
    if(input$distro_choosen == "Skewed Normal"){
      if(max(dsn(seq(-50, 50, by = 1), 0, input$scale1, input$skew1)) >= 
         max(dsn(seq(-50, 50, by = 1), 0, input$scale2, input$skew2))){
        curve(dsn(x, 0, input$scale1, input$skew1), 
              from = (-.5 * max(input$scale1, input$scale2) * max(input$effect_size_norm, 10)), 
              to = (.7 * max(input$scale1, input$scale2) * max(input$effect_size_norm, 10)), col = "red", ylab = "Density") # location, scale, skew
        curve(dsn(x, input$effect_size_norm, input$scale2, input$skew2), add = T)
        abline(v = input$scale1 * (input$skew1/sqrt(1 + input$skew1^2)) * sqrt(2/pi), col = "red", lty = 2)
        abline(v = input$effect_size_norm + input$scale2 * (input$skew2/sqrt(1 + input$skew2^2)) * sqrt(2/pi), col = "black", lty = 2)
      }else {
        curve(dsn(x, input$effect_size_norm, input$scale2, input$skew2), 
              from = -.5 * max(input$scale1, input$scale2) * max(input$effect_size_norm, 10), 
              to = .7 * max(input$scale1, input$scale2) * max(input$effect_size_norm, 10), ylab = "Density") # location, scale, skew
        curve(dsn(x, 0, input$scale1, input$skew1), add = T, col = "red")
        abline(v = input$scale1 * (input$skew1/sqrt(1 + input$skew1^2)) * sqrt(2/pi), col = "red", lty = 2)
        abline(v = input$effect_size_norm + input$scale2 * (input$skew2/sqrt(1 + input$skew2^2)) * sqrt(2/pi), col = "black", lty = 2)
      }
      
    }else if(input$distro_choosen == "Chi-Squared"){
      curve(dchisq(x, input$df1, input$ncp1), 
            from = 0, to = 2 * max(input$df1, input$df2), col = "red", ylab = "Density") # df, non centrality paramter
      curve(dchisq(x, input$df2, input$ncp2), add = T)
      abline(v = input$df1 + input$ncp1, col = "red")
      abline(v = input$df2 + input$ncp2)
    }
    legend("topright", legend = c("Sample 1", "Sample 2"), lty = c(1, 1), col = c("red", "black"),
           bty = "n")
    
  })
  
  
  
  ######second app
  output$parameters <- renderUI({
    if (input$type == "Quadratic"){
      tagList(
        sliderInput("nlr", "Sample size", min=5, max=300, value = c(10, 100), step=20, round= TRUE),
        sliderInput("b0", "Value beta0", min=-30, max=30, value = 1, step=1, round=TRUE),
        sliderInput("b1", "Value beta1", min=-30, max=30, value = 1, step=1, round=TRUE),
        sliderInput("b2", "Value beta2", min=-30, max=30, value = 1, step=1, round=TRUE),
        sliderInput("s", "Value standard deviation of the error term", min=.001, max=20, value = 5, step=0.1, round=TRUE),
        sliderInput("alpha", "Value alpha", min=0, max=0.1, value = 0.05, step=.01, round=TRUE),
        sliderInput("reps", "Number of replications", min=0, max=1000, value = 100, step=20, round=TRUE)
      )}
    else{
      tagList(
        sliderInput("n1", "Sample size", min=5, max=300, value =c(10, 100), step=20, round= TRUE),
        sliderInput("b01", "Value beta0", min=-30, max=30, value = 1, step=1, round=TRUE),
        sliderInput("b11", "Value beta1", min=-30, max=30, value = 1, step=1, round=TRUE),
        sliderInput("b21", "Value beta2", min=-30, max=30, value = 1, step=1, round=TRUE),
        sliderInput("b31", "Value beta3", min=-30, max=30, value = 1, step=1, round=TRUE),
        sliderInput("s1", "Value standard deviation of the error term", min=.001, max=20, value = 5, step=0.1, round=TRUE),
        sliderInput("alpha1", "Value alpha", min=0, max=0.1, value = 0.05, step=.01, round=TRUE),
        sliderInput("reps1", "Number of replications", min=0, max=1000, value = 100, step=20, round=TRUE)
      )} 
  })
  
  output$dis <- renderPlot({
    if (input$type == "Quadratic"){
      x <- seq(from = -1, to = 1, length.out=input$nlr[2] - input$nlr[1] +1)
      y <- input$b0 + input$b1*x + input$b2*(x^2)
      plot(x,y, type="l")
      points(x,y+rnorm(input$nlr[2] - input$nlr[1] +1,mean=0, sd=input$s))}
    else{
      x <- seq(from = -1, to = 1, length.out =input$n1[2] - input$n1[1] +1)
      y <- input$b01 + input$b11*x + input$b21*(x^2) + input$b31*(x^3)
      plot(x,y, type="l")
      points(x,y+rnorm(input$n1[2] - input$n1[1] +1,mean=0, sd=input$s1))}
    abline(lm(y ~ x), col = "red")
  })
  
  output$pow1 <- renderPlot({
    if (input$type == "Quadratic"){
      pow <- c()
      for(i in input$nlr[1]:input$nlr[2]){
        pval <- replicate(input$reps,{
          x <- seq(from = -1, to = 1, length.out = i)
          y <- input$b0 + input$b1*x + input$b2*(x^2) + rnorm(i,mean=0, sd=input$s)
          unlist(summary(lm(y ~ x)))$coefficients8
        })
        pow <- c(pow, mean(pval < input$alpha))
      }
      plot(input$nlr[1]:input$nlr[2],pow, xlab = "Sample size", ylab = "Power")}
    else{
      pow <- c()
      for(j in input$n1[1]:input$n1[2]){
        pval <- replicate(input$reps1,{
          x <- seq(from = -1, to = 1, length.out = j)
          y <- input$b01 + input$b11*x + input$b21*(x^2) + input$b31*(x^3) + rnorm(j,mean=0, sd=input$s1)
          unlist(summary(lm(y ~ x)))$coefficients8
        })
        pow <- c(pow, mean(pval < input$alpha1))}
      plot(input$n1[1]:input$n1[2],pow, xlab = "Sample size", ylab = "Power") 
    }
  })
  
  output$vio <- renderPlot({
    if (input$type == "Quadratic"){
      x <- seq(from = -1, to = 1, length.out=input$nlr[2] - input$nlr[1] + 1)
      y <- input$b0 + input$b1*x + input$b2*(x^2) + rnorm(input$nlr[2] - input$nlr[1] + 1, mean=0, sd=input$s)
      regression <- lm(y~x)
      par(mfrow = c(2, 2))
      plot(regression)}
    else{
      x <- seq(from = -1, to = 1, length.out=input$n1)
      y <- input$b01 + input$b11*x + input$b21*(x^2) + input$b31*(x^3) + rnorm(input$n1,mean=0, sd=input$s1)
      regression1 <- lm(y~x)
      par(mfrow = c(2, 2))
      plot(regression1)}
  })
  
  
}

shinyApp(ui,server)