# -------------------------------------------------------------------
# Principal Components Cartography of TIMSS Italy (Grade 8, 2019)
# Author: Christopher Irwin
#
# License: CC BY-NC 4.0 (Creative Commons Attribution-NonCommercial 4.0)
#          https://creativecommons.org/licenses/by-nc/4.0/
#
# Permissions:
#   You may copy, modify, and share this code for research and educational use.
#   You may NOT use this code for commercial purposes without explicit permission.
#
# Suggested citation:
# Irwin, Christopher. (2025). Principal Components Cartography 
# of TIMSS Italy (Grade 8, 2025). GitHub repository. 
# https://github.com/postquant/scripts
# -------------------------------------------------------------------

# --- Required packages
library(dplyr)
library(tidyr)
library(haven)

# =========================
# TIMSS 2023 (Grade 8, ITA)
# =========================
timss_it_2023 <- BSGITAM8 |>
  mutate(
    # orienting
    immigrant_student     = if_else(BSBG09A == 2, 1L, 0L),
    science_confidence    = BSBGSCS,
    value_of_science      = BSBGSVS,
    
    # demographic
    female                = if_else(BSBG01 == 1, 1L, 0L),
    education_pA          = BSBG06A,
    education_pB          = BSBG06B,
    immigrant_pA          = BSBG08A,
    immigrant_pB          = BSBG08B,
    same_home_language    = BSBG03,
    home_resources        = BSBGHER,
    
    # general affect
    science_learning_attitude = BSBGSLS,
    expected_attainment       = BSBG07,
    
    enjoy_science         = BSBS25A,
    wish_no_science       = BSBS25B,
    science_boring        = BSBS25C,
    science_interesting   = BSBS25D,
    like_science          = BSBS25E,
    look_forward_science  = BSBS25F,
    science_favorite      = BSBS25I,
    
    do_well_science       = BSBS28A,
    science_more_difficult= BSBS28B,
    science_not_strength  = BSBS28C,
    quick_learn_science   = BSBS28D,
    good_difficult_problems = BSBS28E,
    i_explain_well        = BSBS28F,
    science_hardest_subject = BSBS28G,
    science_is_confusing  = BSBS28H,
    
    important_to_parents  = BSBS29H,
    important_to_me       = BSBS29I,
    belonging             = BSBGSSB,
    
    # specific affective
    teachers_care         = BSBG16D,
    experiments_frequency = BSBS24,
    science_world_works   = BSBS25G,
    like_experiments      = BSBS25H,
    
    clear_expectations    = BSBS26A,
    understand_teacher    = BSBS26B,
    clear_answers         = BSBS26C,
    good_at_explaining    = BSBS26D,
    variety_to_learn      = BSBS26E,
    explains_again        = BSBS26F,
    helpful_feedback      = BSBS26G,
    show_learning         = BSBS26H,
    explain_answers       = BSBS26I,
    apply_situations      = BSBS26J,
    
    helps_daily_life         = BSBS29A,
    helps_learn_other_subjects = BSBS29B,
    helps_college            = BSBS29C,
    helps_career             = BSBS29D,
    want_job_with_science    = BSBS29E,
    helps_get_ahead          = BSBS29F,
    creates_more_opportunity = BSBS29G
  ) |>
  select(
    immigrant_student, science_confidence, value_of_science,
    female, education_pA, education_pB, immigrant_pA, immigrant_pB,
    same_home_language, home_resources,
    science_learning_attitude, expected_attainment,
    enjoy_science, wish_no_science, science_boring, science_interesting,
    like_science, look_forward_science, science_favorite,
    do_well_science, science_more_difficult, science_not_strength, quick_learn_science,
    good_difficult_problems, science_hardest_subject, science_is_confusing,
    important_to_parents, important_to_me, belonging,
    teachers_care, experiments_frequency, science_world_works, like_experiments,
    clear_expectations, understand_teacher, clear_answers, good_at_explaining,
    variety_to_learn, explains_again, helpful_feedback, show_learning,
    explain_answers, apply_situations, i_explain_well,
    helps_daily_life, helps_learn_other_subjects, helps_college, helps_career,
    want_job_with_science, helps_get_ahead, creates_more_opportunity
  )

# =========================
# TIMSS 2019 (Grade 8, ITA)
# =========================
timss_it_2019 <- bsgitab7 |>
  # pull only what we need to derive standardized names
  dplyr::select(
    IDSCHOOL, IDSTUD,
    BSBG01, BSBG03, BSBG06A, BSBG06B, BSBG07, BSBG08A, BSBG08B, BSBG09A, BSBG09B,
    BSBG13D,    # teachers are fair (note: not same as 2023 'teachers_care')
    BSBS21,     # experiments frequency
    dplyr::starts_with(c("BSBG12","BSBG13","BSBS22","BSBS23","BSBS24","BSBS25")),
    BSBGHER, BSBGSSB, BSBGSLS, BSBGSCS, BSBGSVS
  ) |>
  dplyr::mutate(
    # orienting
    immigrant_student = if_else(BSBG09A == 2, 1L, 0L),
    science_confidence = BSBGSCS,
    value_of_science   = BSBGSVS,
    
    # demographic
    female = if_else(BSBG01 == 1, 1L, 0L),
    education_pA = BSBG06A,
    education_pB = BSBG06B,
    immigrant_pA = BSBG08A,
    immigrant_pB = BSBG08B,
    same_home_language = BSBG03,
    home_resources = BSBGHER,
    
    # general affect
    science_learning_attitude = BSBGSLS,
    expected_attainment = BSBG07,
    
    enjoy_science = BSBS22A,
    wish_no_science = BSBS22B,
    science_boring = BSBS22C,
    science_interesting = BSBS22D,
    like_science = BSBS22E,
    look_forward_science = BSBS22F,
    science_favorite = BSBS22I,
    
    do_well_science = BSBS24A,
    science_more_difficult = BSBS24B,
    science_not_strength = BSBS24C,
    quick_learn_science = BSBS24D,
    good_difficult_problems = BSBS24E,
    science_hardest_subject = BSBS24G,
    science_is_confusing = BSBS24H,
    
    important_to_parents = BSBS25H,
    important_to_me = BSBS25I,
    belonging = BSBGSSB,
    
    # specific affective (note: some names differ from 2023 and thus won't be in common set)
    teachers_are_fair     = BSBG13D,
    experiments_frequency = BSBS21,
    science_world_works   = BSBS22G,
    like_experiments      = BSBS22H,
    clear_expectations    = BSBS23A,
    understand_teacher    = BSBS23B,
    clear_answers         = BSBS23C,
    good_at_explaining    = BSBS23D,
    variety_to_learn      = BSBS23E,
    link_to_already_know  = BSBS23F,
    explains_again        = BSBS23G,
    recognized_as_good    = BSBS24F,
    
    helps_daily_life         = BSBS25A,
    helps_learn_other_subjects = BSBS25B,
    helps_college            = BSBS25C,
    helps_career             = BSBS25D,
    want_job_with_science    = BSBS25E,
    helps_get_ahead          = BSBS25F,
    creates_more_opportunity = BSBS25G
  ) |>
  # keep only standardized names; drop raw TIMSS codes/IDs
  dplyr::select(
    immigrant_student, science_confidence, value_of_science,
    female, education_pA, education_pB, immigrant_pA, immigrant_pB,
    same_home_language, home_resources,
    science_learning_attitude, expected_attainment,
    enjoy_science, wish_no_science, science_boring, science_interesting,
    like_science, look_forward_science, science_favorite,
    do_well_science, science_more_difficult, science_not_strength, quick_learn_science,
    good_difficult_problems, science_hardest_subject, science_is_confusing,
    important_to_parents, important_to_me, belonging,
    # specific affective (note: teachers_are_fair name differs from 2023 'teachers_care')
    teachers_are_fair, experiments_frequency, science_world_works, like_experiments,
    clear_expectations, understand_teacher, clear_answers, good_at_explaining,
    variety_to_learn, link_to_already_know, explains_again, recognized_as_good,
    helps_daily_life, helps_learn_other_subjects, helps_college, helps_career,
    want_job_with_science, helps_get_ahead, creates_more_opportunity
  )

# ==========================================================
# Find common variables by standardized names & combine
# ==========================================================

# after you build timss_it_2019 and timss_it_2023:
timss_it_2019 <- haven::zap_labels(timss_it_2019)
timss_it_2023 <- haven::zap_labels(timss_it_2023)

common_vars <- intersect(names(timss_it_2019), names(timss_it_2023))

timss_it_both <- dplyr::bind_rows(
  timss_it_2019 |> dplyr::mutate(year = 2019) |> dplyr::select(year, dplyr::all_of(common_vars)),
  timss_it_2023 |> dplyr::mutate(year = 2023) |> dplyr::select(year, dplyr::all_of(common_vars))
)

# ==========================================================
# Build the analysis matrix from both years combined
# ==========================================================
timss_analysis <- timss_it_both |>
  dplyr::select(-year) |>        # drop the year column, keep all predictors
  tidyr::drop_na()               # listwise deletion for missing data

# Scale data and generate the Principal Components
pca_raw <- timss_analysis |>
  scale() |>
  prcomp()

# ==========================================================
# Check + flip PCs so orienting vars are positive when all-negative
# ==========================================================
align_and_report_pcs <- function(pca_obj, orienting_vars) {
  stopifnot(is.list(pca_obj), !is.null(pca_obj$rotation))
  rot <- pca_obj$rotation
  
  if (!all(orienting_vars %in% rownames(rot))) {
    missing_vars <- orienting_vars[!orienting_vars %in% rownames(rot)]
    stop("align_and_report_pcs(): missing in rotation: ",
         paste(missing_vars, collapse = ", "))
  }
  
  subrot <- rot[orienting_vars, , drop = FALSE]
  
  same_sign    <- function(v) all(v >= 0) || all(v <= 0)
  all_negative <- function(v) all(v <= 0) && any(v < 0)
  all_positive <- function(v) all(v >= 0) && any(v > 0)
  
  pc_names <- colnames(subrot)
  report <- lapply(seq_along(pc_names), function(j) {
    v <- subrot[, j]
    data.frame(
      pc              = pc_names[j],
      consistent_sign = same_sign(v),
      sign_pattern    = if (all_negative(v)) "all_negative"
      else if (all_positive(v)) "all_positive"
      else "mixed",
      min_loading     = min(v),
      max_loading     = max(v),
      stringsAsFactors = FALSE
    )
  }) |> dplyr::bind_rows()
  
  # Flip PCs that are consistent AND all-negative on orienting vars
  flip_idx <- which(report$consistent_sign & report$sign_pattern == "all_negative")
  flipped_pcs <- character(0)
  
  if (length(flip_idx) > 0) {
    pca_obj$rotation[, flip_idx] <- -pca_obj$rotation[, flip_idx]
    if (!is.null(pca_obj$x)) pca_obj$x[, flip_idx] <- -pca_obj$x[, flip_idx]
    flipped_pcs <- report$pc[flip_idx]
  }
  report$flipped <- report$pc %in% flipped_pcs
  
  # Diagnostics after potential flip
  rot2 <- pca_obj$rotation
  consistent_rows <- report$consistent_sign
  consistent_pcs <- report$pc[consistent_rows]
  sign_patterns  <- report$sign_pattern[consistent_rows]
  names(sign_patterns) <- consistent_pcs
  loadings <- if (length(consistent_pcs) > 0) {
    rot2[orienting_vars, consistent_pcs, drop = FALSE]
  } else {
    rot2[orienting_vars, 0, drop = FALSE]
  }
  
  list(
    pca             = pca_obj,
    report          = tibble::as_tibble(report),
    consistent_pcs  = consistent_pcs,
    sign_patterns   = sign_patterns,
    loadings        = loadings
  )
}

orienting_vars <- c("science_confidence", "value_of_science", "immigrant_student")
aligned <- align_and_report_pcs(pca_raw, orienting_vars)

cat("PCs with consistent signs:\n")
print(aligned$consistent_pcs)

cat("\nSign patterns:\n")
print(aligned$sign_patterns)

cat("\nLoadings for these PCs:\n")
print(aligned$loadings)

#####################################################################
######### VISUALIZE PCs with BIPLOTS ################################
#####################################################################

arrow_style <- grid::arrow(
  angle = 20, length = grid::unit(8, "pt"),
  ends = "first", type = "closed"
)

timss_tidy <- aligned$pca |>
  broom::tidy(matrix = "rotation") |>
  tidyr::pivot_wider(
    names_from = "PC", values_from = "value",
    names_prefix = "PC"
  )



# Create a new column for custom labels with multiple conditions
timss_tidy$custom_label <- dplyr::case_when(
  # Demographic / orienting
  timss_tidy$column == "female"               ~ "female",
  timss_tidy$column == "immigrant_student"    ~ "born outside the country",
  timss_tidy$column == "immigrant_pA"         ~ "immigrant parent",
  timss_tidy$column == "immigrant_pB"         ~ "immigrant parent",
  timss_tidy$column == "education_pA"         ~ "parent education",
  timss_tidy$column == "education_pB"         ~ "parent education",
  timss_tidy$column == "same_home_language"   ~ "same home and instruction language",
  timss_tidy$column == "home_resources"       ~ "home resources",
  
  # General affective / scales
  timss_tidy$column == "science_confidence"        ~ "science confidence",
  timss_tidy$column == "value_of_science"          ~ "student's value of science",
  timss_tidy$column == "expected_attainment"       ~ "student expected educational attainment",
  timss_tidy$column == "science_learning_attitude" ~ "positive attitude toward science",
  timss_tidy$column == "belonging"                 ~ "sense of school belonging",
  
  # Material classroom environment
  # timss_tidy$column == "dont_listen"        ~ "students don't listen",
  # timss_tidy$column == "noisy_class"        ~ "class is noisy",
  # timss_tidy$column == "wait_for_quiet"     ~ "teacher waits for quiet",
  # timss_tidy$column == "students_interrupt" ~ "students interrupt",
  # timss_tidy$column == "ignore_rules"       ~ "students ignore rules",
  # timss_tidy$column == "hard_concentrate"   ~ "hard to concentrate",
  # timss_tidy$column == "homework_quantity"  ~ "amount of science homework",
  
  # Enjoyment / interest / self-concept (BSBS25/28)
  timss_tidy$column == "enjoy_science"            ~ "science enjoyment",
  timss_tidy$column == "wish_no_science"          ~ "wish to avoid science",
  timss_tidy$column == "science_boring"           ~ "science is boring",
  timss_tidy$column == "science_interesting"      ~ "science is interesting",
  timss_tidy$column == "like_science"             ~ "I like science",
  timss_tidy$column == "look_forward_science"     ~ "look forward to science",
  timss_tidy$column == "science_favorite"         ~ "science favorite subject",
  timss_tidy$column == "do_well_science"          ~ "I do well in science",
  timss_tidy$column == "science_more_difficult"   ~ "greater science difficulty than peers",
  timss_tidy$column == "science_not_strength"     ~ "science is not my strength",
  timss_tidy$column == "quick_learn_science"      ~ "I learn science quickly",
  timss_tidy$column == "good_difficult_problems"  ~ "I am good at difficult problems",
  timss_tidy$column == "science_is_confusing"     ~ "science makes me confused",
  timss_tidy$column == "science_hardest_subject"  ~ "science more difficult than other subjects",
  timss_tidy$column == "i_explain_well"           ~ "I explain my science ideas well",
  
  # Teaching & classroom practices (BSBS24/26)
  timss_tidy$column == "experiments_frequency" ~ "frequency of science experiments",
  timss_tidy$column == "science_world_works"   ~ "science explains how the world works",
  timss_tidy$column == "like_experiments"      ~ "like experiments",
  timss_tidy$column == "teachers_care"         ~ "teachers care about me",
  timss_tidy$column == "clear_expectations"    ~ "clear classroom expectations",
  timss_tidy$column == "understand_teacher"    ~ "teacher easy to understand",
  timss_tidy$column == "clear_answers"         ~ "teacher gives clear answers",
  timss_tidy$column == "good_at_explaining"    ~ "teacher is good at explaining",
  timss_tidy$column == "variety_to_learn"      ~ "variety of teaching strategies",
  timss_tidy$column == "explains_again"        ~ "teacher explains multiple times",
  timss_tidy$column == "helpful_feedback"      ~ "helpful feedback",
  timss_tidy$column == "show_learning"         ~ "show what I learned",
  timss_tidy$column == "explain_answers"       ~ "teacher explains answers",
  timss_tidy$column == "apply_situations"      ~ "apply science to situations",
  
  # Value of science (BSBS29)
  timss_tidy$column == "helps_daily_life"          ~ "science helpful in daily life",
  timss_tidy$column == "helps_learn_other_subjects"~ "science necessary for other subjects",
  timss_tidy$column == "helps_college"             ~ "science necessary for college admission",
  timss_tidy$column == "helps_career"              ~ "science necessary for desired careers",
  timss_tidy$column == "want_job_with_science"     ~ "I want a job that involves science",
  timss_tidy$column == "helps_get_ahead"           ~ "science important for getting ahead",
  timss_tidy$column == "creates_more_opportunity"  ~ "learning science creates future opportunity",
  timss_tidy$column == "important_to_parents"      ~ "important to parents",
  timss_tidy$column == "important_to_me"           ~ "important to do well in science",
  
  TRUE ~ ""
)



# temporary labels to facilitate initial analysis of plots
#timss_tidy$custom_label <- match(timss_tidy$column, unique(timss_tidy$column))


# Vectors for variable groupings

orienting_factors <- c(
  "immigrant_student",
  "science_confidence",
  "value_of_science"
)

demographic <- c(
  "female", "education_pA", "education_pB",
  "immigrant_pA", "immigrant_pB",
  "same_home_language", "home_resources"
)

# material_classroom <- c(
#   "dont_listen", "noisy_class", "wait_for_quiet",
#   "students_interrupt", "ignore_rules",
#   "hard_concentrate", "homework_quantity"
# )

general_affective <- c(
  "belonging", "expected_attainment",
  "look_forward_science", "science_favorite",
  "do_well_science", "science_more_difficult", "science_not_strength",
  "enjoy_science", "wish_no_science", "science_boring", "science_interesting",
  "like_science", "quick_learn_science", "good_difficult_problems",
  "science_hardest_subject", "science_is_confusing",
  "important_to_parents", "important_to_me",
  "science_learning_attitude"
)

specific_affective <- c(
  "teachers_care", "experiments_frequency", "science_world_works",
  "like_experiments",
  "clear_expectations", "understand_teacher", "clear_answers",
  "good_at_explaining", "variety_to_learn", "explains_again",
  "helpful_feedback", "show_learning", "explain_answers", "apply_situations",
  "i_explain_well",
  "helps_daily_life", "helps_learn_other_subjects", "helps_college",
  "helps_career", "want_job_with_science", "helps_get_ahead",
  "creates_more_opportunity"
)


