Power and Violation of Assumptions

2 Sample Tests

Change Distribution to sample from

Distributions
Power

Linear Regression

Choose your regression type

# Name: Power.R # Description: App to visualize Power under certain violations of assumptions # # Input: -- # Output: -- # # Created: 27-30/Mrz/2019, Julian, Chiara, Nino, Sahra ########################################## # working directory # setwd("C:/Users/Julian/Dropbox/Psychologie B.Sc/TquanT Seminar/Projekt_Power/merge") # packages library("shiny") library("sn") library("parallel") library("lattice") library("shinycssloaders") library("shinydashboard") # load custom functions #### power_calculation and sampling for skewed normal power_skewed_normal <- function(n, size_ratio_norm, scale1, skew1, effect_size_norm, scale2, skew2){ #library("parallel") iter_sim <- 1000 ## vllt spter anpassen #n <- c(20,120) #effect_size_norm <- 1 #size_ratio_norm <- 1 #scale1 <- 3 #skew1 <- 0 #scale2 <- 3 #skew2 <- 0 #Parallelisieren in R auf mehreren Clustern cores <- detectCores() - 1 #Cluster erstellen clu <- makeCluster(cores) #Pakete in jedes Cluster laden clusterEvalQ(clu,{ library("sn") sim_data <- function(n, size_ratio_norm, scale1, skew1, effect_size_norm, scale2, skew2){ s1 <- rsn(n * size_ratio_norm, 0, scale1, skew1) # location, scale, skew s2 <- rsn(n, effect_size_norm, scale2, skew2) return(list(s1, s2)) } }) #Daten in jedes Cluster laden clusterExport(clu,c("n", "iter_sim", "size_ratio_norm", "scale1", "skew1", "effect_size_norm", "scale2", "skew2"), envir=environment()) #Power t, welch, wilcox power_t <- c() power_welch <- c() power_wilcox <- c() for(j in round(seq(n[1], n[2], length.out = 10))){ clusterExport(clu, c("j"), envir=environment()) ps <- parSapply(clu, 1:iter_sim, function(i,...){ .t <- sim_data(j, size_ratio_norm, scale1, skew1, effect_size_norm, scale2, skew2) list( t.test(.t[[1]], .t[[2]], var.equal = T)$p.value, t.test(.t[[1]], .t[[2]], var.equal = F)$p.value, wilcox.test(.t[[1]], .t[[2]])$p.value ) }) .p <- apply(ps, 1, function(x){mean(x < .05)}) power_t <- c(power_t, .p[1]) power_welch <- c(power_welch, .p[2]) power_wilcox <- c(power_wilcox, .p[3]) } #Cluster stoppen!!! stopCluster(clu) power_p <- data.frame(power = c(power_t, power_welch, power_wilcox), test = rep(c("t", "welch", "wilcox"), each = length(round(seq(n[1], n[2], length.out = 10)))), n = round(seq(n[1], n[2], length.out = 10))) return(power_p) } #### power_calculation and sampling for chisq power_chisq <- function(n, size_ratio_chisq, df1, ncp1, df2, ncp2){ iter_sim <- 1000 #Parallelisieren in R auf mehreren Clustern cores <- detectCores() - 1 #Cluster erstellen clu <- makeCluster(cores) #Pakete in jedes Cluster laden clusterEvalQ(clu,{ sim_data <- function(n, size_ratio_chisq, df1, ncp1, df2, ncp2){ s1 <- rchisq(n * size_ratio_chisq, df1, ncp1) # df, non centrality paramter s2 <- rchisq(n, df2, ncp2) return(list(s1, s2)) } }) #Daten in jedes Cluster laden clusterExport(clu,c("n", "iter_sim", "size_ratio_chisq", "df1", "ncp1", "df2", "ncp2"), envir=environment()) #Power t, welch, wilcox power_t <- c() power_welch <- c() power_wilcox <- c() for(j in round(seq(n[1], n[2], length.out = 10))){ clusterExport(clu, c("j"), envir=environment()) ps <- parSapply(clu, 1:iter_sim, function(i,...){ .t <- sim_data(j, size_ratio_chisq, df1, ncp1, df2, ncp2) list( t.test(.t[[1]], .t[[2]], var.equal = T)$p.value, t.test(.t[[1]], .t[[2]], var.equal = F)$p.value, wilcox.test(.t[[1]], .t[[2]])$p.value ) }) .p <- apply(ps, 1, function(x){mean(x < .05)}) power_t <- c(power_t, .p[1]) power_welch <- c(power_welch, .p[2]) power_wilcox <- c(power_wilcox, .p[3]) } #Cluster stoppen!!! stopCluster(clu) power_p <- data.frame(power = c(power_t, power_welch, power_wilcox), test = rep(c("t", "welch", "wilcox"), each = length(round(seq(n[1], n[2], length.out = 10)))), n = round(seq(n[1], n[2], length.out = 10))) return(power_p) } ######################### ######################### ######################### ui <- dashboardPage(title = "Power and Violation of Assumptions", skin = "black", dashboardHeader(title = "Power and Violation of Assumptions", titleWidth = 450), dashboardSidebar( sidebarMenu( menuItem("2 Sample Tests", tabName = "first_app"), menuItem("Linear Regression", tabName = "second_app") ) ), dashboardBody( tabItems( tabItem(tabName = "first_app", includeScript("../../../Matomo-tquant.js"), h4("2 Sample Tests"), ###### first app fluidRow( box( selectInput("distro_choosen", "Change Distribution to sample from", c("Skewed Normal", "Chi-Squared") , selected = "Skewed Normal"), uiOutput("distro_setting") ###### uiOutput("power_setting"), ), tabBox(id = "tabs_active", type = "tabs", tabPanel("Distributions", plotOutput("distro_plot")), tabPanel("Power", actionButton("Update", "Calculate Power"), withSpinner(plotOutput("power_plot")) ) ) ) ###### ), tabItem(tabName = "second_app", h4("Linear Regression"), ###### second appp fluidRow( box(selectInput("type", "Choose your regression type", c("Quadratic","Cubic"), multiple = FALSE), uiOutput("parameters") ), tabBox(type = "tabs", tabPanel("Distribution", plotOutput("dis")), tabPanel("Power", withSpinner(plotOutput("pow1")), textOutput("pow2")), tabPanel("Violations", plotOutput("vio")))) ) ) ) ) server <- function(input,output,server){ ###### first app sim_power <- eventReactive(input$Update,{ #simulate data if(input$distro_choosen == "Skewed Normal"){ sim <- power_skewed_normal(n = input$n, input$size_ratio_norm, input$scale1, input$skew1, input$effect_size_norm, input$scale2, input$skew2) }else if(input$distro_choosen == "Chi-Squared"){ sim <- power_chisq(input$n, input$size_ratio_chisq, input$df1, input$ncp1, input$df2, input$ncp2) } return(sim) }) output$power_plot <- renderPlot({ #xyplot(power ~ jitter(n, 0.01), data = sim_power(), groups = test, # type = c("p"), auto.key = T, xlab = "Samplesize N1", ylab = "Power") plot(power ~ jitter(n, 0.03), data = sim_power()[sim_power()$test == "t", ], col = "red", xlab = "Samplesize N1", pch = 16, ylim = c(0, 1), ylab = "Power") points(power ~ jitter(n, 0.03), data = sim_power()[sim_power()$test == "welch", ], col = "blue", pch = 16) points(power ~ jitter(n, 0.03), data = sim_power()[sim_power()$test == "wilcox", ], col = "green", pch = 16) isolate({ if(input$distro_choosen == "Skewed Normal" && input$size_ratio_norm == 1){ var_theo1 <- input$scale1^2 * (1 - (2 * (input$skew1/sqrt(1 + input$skew1^2))^2)/pi) var_theo2 <- input$scale2^2 * (1 - (2 * (input$skew2/sqrt(1 + input$skew2^2))^2)/pi) sd_pooled <- sqrt((((input$n * input$size_ratio_norm -1)* var_theo1) + ((input$n -1) * var_theo2))/(input$n + ((input$n * input$size_ratio_norm) - 2))) p_theo <- power.t.test(n = input$n[1]:input$n[2], delta = abs(input$scale1 * (input$skew1/sqrt(1 + input$skew1^2)) - (input$effect_size_norm + input$scale2 * (input$skew2/sqrt(1 + input$skew2^2)))), sd = sd_pooled, .05)$power points(p_theo ~ I(input$n[1]:input$n[2]), type = "l") } if(input$distro_choosen == "Chi-Squared" && input$size_ratio_chisq == 1){ var_theo1 <- 2 * (input$df1 + 2 * input$ncp1) var_theo2 <- 2 * (input$df2 + 2 * input$ncp2) sd_pooled <- sqrt((((input$n * input$size_ratio_chisq -1)* var_theo1) + ((input$n -1) * var_theo2))/(input$n + ((input$n * input$size_ratio_chisq) - 2))) p_theo <- power.t.test(n = input$n[1]:input$n[2], delta = abs((input$df1 + input$ncp1) - (input$df2 + input$ncp2)), sd = sd_pooled, .05)$power points(p_theo ~ I(input$n[1]:input$n[2]), type = "l") } }) legend("bottomright", legend = c("T-test; Normal distributed", "T-test", "Welch-test", "Wilcox-test"), lty = 1, col = c("black", "red", "blue", "green"), bty = "n") }) output$power_setting <- renderUI({ if(input$distro_choosen == "Chi-Squared" & input$tabs_active == "Power"){ tagList( sliderInput("n", "Sample size Range", min = 2, max = 600, value = c(2, 150)), sliderInput("size_ratio_chisq", "Ratio of Sample Size (N1/N2)", min =1, max = 10, value = 1)) } else if(input$distro_choosen == "Skewed Normal" & input$tabs_active == "Power"){ tagList( sliderInput("n", "Sample size Range", min = 2, max = 600, value = c(2, 150)), sliderInput("size_ratio_norm", "Ratio of Sample Size (N1/N2)", min =1, max = 10, value = 1) ) } }) output$diff_means <- reactive({ if(input$distro_choosen == "Skewed Normal"){ round(abs(input$scale1 * (input$skew1/sqrt(1 + input$skew1^2)) - (input$effect_size_norm + input$scale2 * (input$skew2/sqrt(1 + input$skew2^2)))), 3) } else if(input$distro_choosen == "Chi-Squared"){ round(abs((input$df1 + input$ncp1) - (input$df2 + input$ncp2))) } }) output$distro_setting <- renderUI({ #adjust UI according to distro choosen if(input$distro_choosen == "Skewed Normal" ){ #s1 <- rsnorm(n1, 0, input$scale1, input$skew1) # location, scale, skew #s2 <- rsnorm(n2, input$effect_size, input$scale2, input$skew2) tagList( uiOutput("power_setting"), sliderInput("effect_size_norm", "Difference in Location", min =-20, max = 20, value = 2), tags$b("True Difference in means:"), textOutput("diff_means"), column(6, h4("Sample 1", aligne = "center"), sliderInput("scale1", "Scale", min =0.01, max = 20, value = 10), sliderInput("skew1", "Skewness", min =-10, max = 10, value = 0) ), column(6, h4("Sample 2", aligne = "center"), sliderInput("scale2", "Scale", min =0.01, max = 20, value = 10), sliderInput("skew2", "Skewness", min =-10, max = 10, value = 0) ) ) }else if(input$distro_choosen == "Chi-Squared"){ #s1 <- rexp(n1, input$rate1) # rate #s2 <- rexp(n2, input$rate2) # input$df1, input$ncp1) # df, non centrality paramter # s2 <- rchisq(n, input$df2, input$ncp2) tagList( uiOutput("power_setting"), tags$b("True Difference in means:"), textOutput("diff_means"), column(6, h4("Sample 1", aligne = "center"), sliderInput("df1", "Degrees of freedom", min =0, max = 20, value = 5), sliderInput("ncp1", "Non-Centrality parameter", min =0, max = 10, value = 0) ), column(6, h4("Sample 2", aligne = "center"), sliderInput("df2", "Degrees of freedom", min =0, max = 20, value = 5), sliderInput("ncp2", "Non-Centrality parameter", min =0, max = 10, value = 0) ) ) } }) output$distro_plot <- renderPlot({ #if(min(input$scale1, input$scale2) == 0){ # plot(c(1, 1), type = "n", axes = F, xlab = "", ylab = "") # text(1, 1, "Scale cannot be zero") #}else if(input$distro_choosen == "Skewed Normal"){ if(max(dsn(seq(-50, 50, by = 1), 0, input$scale1, input$skew1)) >= max(dsn(seq(-50, 50, by = 1), 0, input$scale2, input$skew2))){ curve(dsn(x, 0, input$scale1, input$skew1), from = (-.5 * max(input$scale1, input$scale2) * max(input$effect_size_norm, 10)), to = (.7 * max(input$scale1, input$scale2) * max(input$effect_size_norm, 10)), col = "red", ylab = "Density") # location, scale, skew curve(dsn(x, input$effect_size_norm, input$scale2, input$skew2), add = T) abline(v = input$scale1 * (input$skew1/sqrt(1 + input$skew1^2)) * sqrt(2/pi), col = "red", lty = 2) abline(v = input$effect_size_norm + input$scale2 * (input$skew2/sqrt(1 + input$skew2^2)) * sqrt(2/pi), col = "black", lty = 2) }else { curve(dsn(x, input$effect_size_norm, input$scale2, input$skew2), from = -.5 * max(input$scale1, input$scale2) * max(input$effect_size_norm, 10), to = .7 * max(input$scale1, input$scale2) * max(input$effect_size_norm, 10), ylab = "Density") # location, scale, skew curve(dsn(x, 0, input$scale1, input$skew1), add = T, col = "red") abline(v = input$scale1 * (input$skew1/sqrt(1 + input$skew1^2)) * sqrt(2/pi), col = "red", lty = 2) abline(v = input$effect_size_norm + input$scale2 * (input$skew2/sqrt(1 + input$skew2^2)) * sqrt(2/pi), col = "black", lty = 2) } }else if(input$distro_choosen == "Chi-Squared"){ curve(dchisq(x, input$df1, input$ncp1), from = 0, to = 2 * max(input$df1, input$df2), col = "red", ylab = "Density") # df, non centrality paramter curve(dchisq(x, input$df2, input$ncp2), add = T) abline(v = input$df1 + input$ncp1, col = "red") abline(v = input$df2 + input$ncp2) } legend("topright", legend = c("Sample 1", "Sample 2"), lty = c(1, 1), col = c("red", "black"), bty = "n") }) ######second app output$parameters <- renderUI({ if (input$type == "Quadratic"){ tagList( sliderInput("nlr", "Sample size", min=5, max=300, value = c(10, 100), step=20, round= TRUE), sliderInput("b0", "Value beta0", min=-30, max=30, value = 1, step=1, round=TRUE), sliderInput("b1", "Value beta1", min=-30, max=30, value = 1, step=1, round=TRUE), sliderInput("b2", "Value beta2", min=-30, max=30, value = 1, step=1, round=TRUE), sliderInput("s", "Value standard deviation of the error term", min=.001, max=20, value = 5, step=0.1, round=TRUE), sliderInput("alpha", "Value alpha", min=0, max=0.1, value = 0.05, step=.01, round=TRUE), sliderInput("reps", "Number of replications", min=0, max=1000, value = 100, step=20, round=TRUE) )} else{ tagList( sliderInput("n1", "Sample size", min=5, max=300, value =c(10, 100), step=20, round= TRUE), sliderInput("b01", "Value beta0", min=-30, max=30, value = 1, step=1, round=TRUE), sliderInput("b11", "Value beta1", min=-30, max=30, value = 1, step=1, round=TRUE), sliderInput("b21", "Value beta2", min=-30, max=30, value = 1, step=1, round=TRUE), sliderInput("b31", "Value beta3", min=-30, max=30, value = 1, step=1, round=TRUE), sliderInput("s1", "Value standard deviation of the error term", min=.001, max=20, value = 5, step=0.1, round=TRUE), sliderInput("alpha1", "Value alpha", min=0, max=0.1, value = 0.05, step=.01, round=TRUE), sliderInput("reps1", "Number of replications", min=0, max=1000, value = 100, step=20, round=TRUE) )} }) output$dis <- renderPlot({ if (input$type == "Quadratic"){ x <- seq(from = -1, to = 1, length.out=input$nlr[2] - input$nlr[1] +1) y <- input$b0 + input$b1*x + input$b2*(x^2) plot(x,y, type="l") points(x,y+rnorm(input$nlr[2] - input$nlr[1] +1,mean=0, sd=input$s))} else{ x <- seq(from = -1, to = 1, length.out =input$n1[2] - input$n1[1] +1) y <- input$b01 + input$b11*x + input$b21*(x^2) + input$b31*(x^3) plot(x,y, type="l") points(x,y+rnorm(input$n1[2] - input$n1[1] +1,mean=0, sd=input$s1))} abline(lm(y ~ x), col = "red") }) output$pow1 <- renderPlot({ if (input$type == "Quadratic"){ pow <- c() for(i in input$nlr[1]:input$nlr[2]){ pval <- replicate(input$reps,{ x <- seq(from = -1, to = 1, length.out = i) y <- input$b0 + input$b1*x + input$b2*(x^2) + rnorm(i,mean=0, sd=input$s) unlist(summary(lm(y ~ x)))$coefficients8 }) pow <- c(pow, mean(pval < input$alpha)) } plot(input$nlr[1]:input$nlr[2],pow, xlab = "Sample size", ylab = "Power")} else{ pow <- c() for(j in input$n1[1]:input$n1[2]){ pval <- replicate(input$reps1,{ x <- seq(from = -1, to = 1, length.out = j) y <- input$b01 + input$b11*x + input$b21*(x^2) + input$b31*(x^3) + rnorm(j,mean=0, sd=input$s1) unlist(summary(lm(y ~ x)))$coefficients8 }) pow <- c(pow, mean(pval < input$alpha1))} plot(input$n1[1]:input$n1[2],pow, xlab = "Sample size", ylab = "Power") } }) output$vio <- renderPlot({ if (input$type == "Quadratic"){ x <- seq(from = -1, to = 1, length.out=input$nlr[2] - input$nlr[1] + 1) y <- input$b0 + input$b1*x + input$b2*(x^2) + rnorm(input$nlr[2] - input$nlr[1] + 1, mean=0, sd=input$s) regression <- lm(y~x) par(mfrow = c(2, 2)) plot(regression)} else{ x <- seq(from = -1, to = 1, length.out=input$n1) y <- input$b01 + input$b11*x + input$b21*(x^2) + input$b31*(x^3) + rnorm(input$n1,mean=0, sd=input$s1) regression1 <- lm(y~x) par(mfrow = c(2, 2)) plot(regression1)} }) } shinyApp(ui,server)