Shiny--Code/samplesize/LevenesTest/app.R

# Tải các thư viện cần thiết
library(shiny)
library(bslib)
library(ggplot2)
library(shinycssloaders)
library(car) # Gói chứa hàm leveneTest

# --- Giao diện người dùng (UI) ---
ui <- fluidPage(
  theme = bs_theme(version = 5, bootswatch = "cerulean"),
  withMathJax(),

  titlePanel("Tính toán Cỡ mẫu cho Kiểm định Levene"),

  sidebarLayout(
    sidebarPanel(
      h4("Tham số đầu vào"),

      numericInput("k_groups",
                   label = "Số lượng nhóm (k):",
                   value = 3, min = 2, max = 10),

      numericInput("sd_ratio",
                   label = "Tỷ lệ độ lệch chuẩn (Effect size):",
                   value = 1.5, min = 1.1, step = 0.1),
      helpText("Đây là tỷ lệ giữa độ lệch chuẩn của nhóm 'khác biệt' so với các nhóm còn lại (có độ lệch chuẩn là 1). Giá trị càng lớn, sự khác biệt càng rõ rệt."),

      sliderInput("sig_level",
                  label = "Mức ý nghĩa (\\(\\alpha\\)):",
                  min = 0.01, max = 0.10, value = 0.05, step = 0.01),

      sliderInput("power",
                  label = "Power mong muốn (\\(1 - \\beta\\)):",
                  min = 0.50, max = 0.99, value = 0.80, step = 0.01),

      numericInput("n_sims",
                   label = "Số lần lặp mô phỏng:",
                   value = 500, min = 100, max = 5000),

      hr(),
      helpText("Kết quả chính và đồ thị Power sẽ tự động cập nhật sau 1 giây. Chuyển sang các tab khác để chạy phân tích sâu hơn.")
    ),

    mainPanel(
      tabsetPanel(
        id = "results_tabs",
        type = "pills",

        tabPanel(
          "Kết quả & Diễn giải",
          h4("Kết quả ước tính"),
          withSpinner(uiOutput("sample_size_output"), type = 6, color = "#007bff")
        ),

        tabPanel(
          "Đồ thị Power vs. Cỡ mẫu",
          h4("Mối quan hệ giữa Power và Cỡ mẫu"),
          withSpinner(plotOutput("power_plot"), type = 6, color = "#007bff")
        ),

        tabPanel(
          "Phân tích Effect Size",
          h4("Mối quan hệ giữa Cỡ mẫu và Effect Size"),
          p("Phân tích này cho thấy cỡ mẫu cần thiết thay đổi như thế nào khi mức độ khác biệt về phương sai thay đổi (với power và alpha được giữ cố định)."),
          actionButton("run_effect_analysis", "Chạy phân tích Effect Size", class = "btn-success mb-3"),
          withSpinner(plotOutput("effect_analysis_plot"), type = 6, color = "#007bff")
        ),

        tabPanel(
          "Giả thuyết và Phương pháp",
          h4("Giả thuyết của Kiểm định Levene"),
          p("Kiểm định Levene được sử dụng để kiểm tra giả định về tính đồng nhất của phương sai (homogeneity of variance) giữa hai hay nhiều nhóm."),
          p("$$H_0: \\sigma_1^2 = \\sigma_2^2 = \\dots = \\sigma_k^2$$"),
          p("$$H_a: \\text{Có ít nhất một cặp phương sai không bằng nhau}$$"),
          hr(),
          h4("Phương pháp tính toán"),
          p("Ứng dụng sử dụng phương pháp mô phỏng Monte Carlo để ước tính cỡ mẫu:"),
          tags$ol(
            tags$li("Với mỗi cỡ mẫu \\(n\\) (cho mỗi nhóm), chúng tôi tạo ra một số lượng lớn các bộ dữ liệu (ví dụ 500 bộ)."),
            tags$li("Trong mỗi bộ dữ liệu, dữ liệu cho \\(k-1\\) nhóm được tạo từ phân phối chuẩn với phương sai bằng 1, và một nhóm được tạo với phương sai bằng (effect size)². "),
            tags$li("Áp dụng kiểm định Levene cho từng bộ dữ liệu và ghi lại p-value."),
            tags$li("Power thực nghiệm được tính bằng tỷ lệ các kiểm định có p-value nhỏ hơn mức ý nghĩa \\(\\alpha\\)."),
            tags$li("Cỡ mẫu cần thiết là cỡ mẫu nhỏ nhất đạt được power mong muốn.")
          )
        )
      )
    )
  )
)

# --- Logic của máy chủ (Server) ---
server <- function(input, output, session) {

  # --- HÀM MÔ PHỎNG LÕI ---
  # Hàm này tìm cỡ mẫu cần thiết cho một bộ tham số cụ thể
  find_required_n <- function(target_power, sig_level, k_groups, sd_ratio, n_sims, progress = NULL) {
    sample_sizes <- seq(10, 500, by = 5)
    required_n <- NA

    for (i in 1:length(sample_sizes)) {
      n <- sample_sizes[i]

      p_values <- replicate(n_sims, {
        # Tạo dữ liệu cho k nhóm
        data_list <- lapply(1:k_groups, function(group_idx) {
          # Nhóm đầu tiên có sd khác biệt, các nhóm còn lại có sd = 1
          current_sd <- if (group_idx == 1) sd_ratio else 1
          rnorm(n, mean = 0, sd = current_sd)
        })

        # Chuyển thành dataframe phù hợp cho leveneTest
        df <- data.frame(
          value = unlist(data_list),
          group = factor(rep(1:k_groups, each = n))
        )

        # Thực hiện kiểm định Levene
        leveneTest(value ~ group, data = df)$`Pr(>F)`[1]
      })

      current_power <- mean(p_values < sig_level, na.rm = TRUE)

      if (!is.null(progress)) {
        progress$inc(1/length(sample_sizes), detail = paste("Cỡ mẫu n =", n, "/ Power =", round(current_power, 2)))
      }

      if (current_power >= target_power) {
        required_n <- n
        break
      }
    }
    return(required_n)
  }

  # --- PHẦN TÍNH TOÁN CHÍNH (TỰ ĐỘNG) ---
  reactive_inputs <- reactive({
    list(
      sig_level = input$sig_level,
      power = input$power,
      k_groups = input$k_groups,
      sd_ratio = input$sd_ratio,
      n_sims = input$n_sims
    )
  })

  debounced_inputs <- debounce(reactive_inputs, 1000)

  simulation_results <- reactive({
    inputs <- debounced_inputs()

    sample_sizes <- seq(10, 500, by = 5)
    powers <- numeric(length(sample_sizes))

    withProgress(message = 'Đang chạy mô phỏng chính...', value = 0, {
      for (i in 1:length(sample_sizes)) {
        n <- sample_sizes[i]
        p_values <- replicate(inputs$n_sims, {
          data_list <- lapply(1:inputs$k_groups, function(j) {
            current_sd <- if (j == 1) inputs$sd_ratio else 1
            rnorm(n, mean = 0, sd = current_sd)
          })
          df <- data.frame(value = unlist(data_list), group = factor(rep(1:inputs$k_groups, each = n)))
          leveneTest(value ~ group, data = df)$`Pr(>F)`[1]
        })
        powers[i] <- mean(p_values < inputs$sig_level, na.rm = TRUE)
        incProgress(1/length(sample_sizes), detail = paste("Cỡ mẫu n =", n))
        if (powers[i] >= inputs$power) {
          sample_sizes <- sample_sizes[1:i]
          powers <- powers[1:i]
          break
        }
      }
    })

    list(
      sample_sizes = sample_sizes,
      powers = powers,
      inputs = inputs
    )
  })

  output$sample_size_output <- renderUI({
    res <- simulation_results()
    first_index_above_power <- which(res$powers >= res$inputs$power)[1]

    if (!is.na(first_index_above_power)) {
      required_n <- res$sample_sizes[first_index_above_power]
      tagList(
        tags$p("Để phát hiện sự khác biệt về phương sai (tỷ lệ SD =", tags$b(res$inputs$sd_ratio), ") giữa", tags$b(res$inputs$k_groups), "nhóm với power là", tags$b(res$inputs$power), "và mức ý nghĩa", tags$b(res$inputs$sig_level), ", bạn cần một cỡ mẫu ước tính là:"),
        tags$h3(style = "color: #007bff; text-align: center;", paste(required_n, "cho mỗi nhóm")),
        tags$p(style = "text-align: center; font-style: italic;", "Tổng cỡ mẫu là ", required_n * res$inputs$k_groups, ".")
      )
    } else {
      tags$div(class = "alert alert-warning", "Không đạt được power mong muốn trong khoảng cỡ mẫu đã thử (tối đa 500). Hãy thử tăng effect size hoặc giảm power mong muốn.")
    }
  })

  output$power_plot <- renderPlot({
    res <- simulation_results()
    plot_data <- data.frame(SampleSize = res$sample_sizes, Power = res$powers)
    ggplot(plot_data, aes(x = SampleSize, y = Power)) +
      geom_line(color = "#007bff", size = 1.2) + geom_point(color = "#007bff", size = 3) +
      geom_hline(yintercept = res$inputs$power, linetype = "dashed", color = "red") +
      annotate("text", x = max(res$sample_sizes) * 0.1, y = res$inputs$power + 0.04, label = paste("Power mục tiêu =", res$inputs$power), color = "red") +
      labs(title = paste("Power vs. Cỡ mẫu (cho mỗi nhóm)"), x = "Cỡ mẫu cho mỗi nhóm (n)", y = "Power thực nghiệm (1 - β)") +
      scale_y_continuous(limits = c(0, 1)) + theme_minimal(base_size = 14)
  })

  # --- PHẦN PHÂN TÍCH EFFECT SIZE ---
  effect_analysis_results <- eventReactive(input$run_effect_analysis, {
    inputs <- isolate(reactive_inputs()) # Lấy giá trị hiện tại, không cần reactive

    effect_sizes <- seq(1.2, 2.5, by = 0.1)
    required_n_values <- numeric(length(effect_sizes))

    withProgress(message = 'Đang chạy phân tích Effect Size...', value = 0, {
      for (i in 1:length(effect_sizes)) {
        current_sd_ratio <- effect_sizes[i]
        # Sử dụng hàm lõi để tìm cỡ mẫu
        required_n_values[i] <- find_required_n(
          target_power = inputs$power,
          sig_level = inputs$sig_level,
          k_groups = inputs$k_groups,
          sd_ratio = current_sd_ratio,
          n_sims = inputs$n_sims,
          progress = NULL # Không cần progress lồng nhau
        )
        incProgress(1/length(effect_sizes), detail = paste("Tỷ lệ SD =", current_sd_ratio))
      }
    })

    data.frame(EffectSize = effect_sizes, RequiredN = required_n_values)
  })

  output$effect_analysis_plot <- renderPlot({
    plot_data <- effect_analysis_results()

    ggplot(plot_data, aes(x = EffectSize, y = RequiredN)) +
      geom_line(color = "#28a745", size = 1.2) +
      geom_point(color = "#28a745", size = 3, na.rm = TRUE) +
      labs(
        title = paste("Cỡ mẫu cần thiết vs. Effect Size (Power cố định =", isolate(input$power), ")"),
        x = "Tỷ lệ Độ lệch chuẩn (Effect Size)",
        y = "Cỡ mẫu cần thiết cho mỗi nhóm (n)"
      ) +
      theme_minimal(base_size = 14)
  })
}

# Chạy ứng dụng Shiny
shinyApp(ui = ui, server = server)