# -------------------------------------------------------------------
# Principal Components Cartography of TIMSS Italy (Grade 8, 2019)
# Author: Christopher Irwin
#
# License: CC BY-NC 4.0 (Creative Commons Attribution-NonCommercial 4.0)
#          https://creativecommons.org/licenses/by-nc/4.0/
#
# Permissions:
#   You may copy, modify, and share this code for research and educational use.
#   You may NOT use this code for commercial purposes without explicit permission.
#
# Suggested citation:
# Irwin, Christopher. (2025). Principal Components Cartography 
# of TIMSS Italy (Grade 8, 2019). GitHub repository. 
# https://github.com/postquant/scripts
# -------------------------------------------------------------------


library(dplyr)
library(broom)
library(tidyverse)
library(patchwork)
library(knitr)

# Pre-processing step:
# The 2019 TIMSS 8th Grade Italy student data file is named 'bsgitab7'
# Load bsgitab7 as a dataframe

 
# Rename dataframe
timss_italy <- bsgitab7

# Subset to demographic, orienting, and affective variables
timss_italy <- timss_italy |>
    select(
      # School and student identifiers
      IDSCHOOL, IDSTUD,
      
      # Demographic variables
      BSBG01,      # Gender
      BSBG03,      # Speak [test language] at home
      BSBG06A,     # Parent A education
      BSBG06B,     # Parent B education
      BSBG07,      # Students' expected level of educational attainment
      BSBG08A,     # Parent A born in country
      BSBG08B,     # Parent B born in country
      BSBG09A,     # Student born in country
      BSBG09B,     # Age of student immigration
      
      # School factors
      BSBG13D,      # Teachers are fair
      BSBS21,      # Frequency of science experiments
      
      # Multiple variable groups using starts_with
      starts_with(c("BSBG12", # internet use for learning
                    "BSBG13", # attitudes toward school
                    "BSBS22", # attitudes toward learning science 
                    "BSBS23", # science teaching
                    "BSBS24", # science self-efficacy
                    "BSBS25"  # value of science
                    )),
      
      # Scale scores and derived variables
      BSBGHER,     # Home Educational Resources scale
      BSBGSSB,     # Student Sense of School Belonging scale
      BSBGSLS,     # Students Like Learning scale
      BSBGSCS,     # Student Confidence scale
      BSBGSVS      # Student Values scale
    ) |>
  mutate( 
    #orienting
    immigrant_student = if_else(BSBG09A == 2, 1, 0),
    science_confidence = BSBGSCS,
    value_of_science = BSBGSVS,
    
    #demographic
    female = if_else(BSBG01 == 1, 1, 0),
    education_pA = BSBG06A,
    education_pB = BSBG06B,
    immigrant_pA = BSBG08A,
    immigrant_pB = BSBG08B,
    same_home_language = BSBG03,
    home_resources = BSBGHER,
    
    #general affect
    science_learning_attitude = BSBGSLS,
    expected_attainment = BSBG07,
    
    enjoy_science = BSBS22A,
    wish_no_science = BSBS22B,
    science_boring = BSBS22C,
    science_interesting = BSBS22D,
    like_science = BSBS22E,
    look_forward_science = BSBS22F,
    science_favorite = BSBS22I,
    do_well_science = BSBS24A,
    science_more_difficult = BSBS24B,
    science_not_strength = BSBS24C,
    quick_learn_science = BSBS24D,
    good_difficult_problems = BSBS24E,
    science_hardest_subject = BSBS24G,
    science_is_confusing = BSBS24H,
    important_to_parents = BSBS25H,
    important_to_me = BSBS25I,
    belonging = BSBGSSB,
    
    #specific affective
    teachers_are_fair = BSBG13D,
    experiments_frequency = BSBS21,
    science_world_works = BSBS22G,
    like_experiments = BSBS22H,
    clear_expectations = BSBS23A,
    understand_teacher = BSBS23B,
    clear_answers = BSBS23C,
    good_at_explaining = BSBS23D,
    variety_to_learn = BSBS23E,
    link_to_already_know = BSBS23F,
    explains_again = BSBS23G,
    recognized_as_good = BSBS24F,
    helps_daily_life = BSBS25A,
    helps_learn_other_subjects = BSBS25B,
    helps_college = BSBS25C,
    helps_career = BSBS25D,
    want_job_with_science = BSBS25E,
    helps_get_ahead = BSBS25F,
    creates_more_opportunity = BSBS25G
    ) |>
  select(-starts_with("BS")) |>
  select(-IDSCHOOL, -IDSTUD)


# Handle missingness via listwise deletion.
# This anlaysis was conducted to avoid layering on artificial "plausible" 
# reponse data, and to let the existing data show alignnments that might 
# spark actionably strategies
timss_italy <- timss_italy |>
  
  na.omit()

# Scale data and generate the Principal Components
timss_italy <- timss_italy |>
  scale() |>
  prcomp()

# Extract the rotation matrix
rotation_matrix <- timss_italy$rotation

# Convert the rotation matrix to a data frame
rotation_df <- as.data.frame(rotation_matrix)

# Function to find PCs with same signs across selected variables
check_variable_signs <- function(rotation_matrix, variables) {
  # Convert to data frame if not already
  rotation_matrix <- as.data.frame(rotation_matrix)
  
  # Ensure all variables exist in the row names
  if (!all(variables %in% rownames(rotation_matrix))) {
    missing_vars <- variables[!variables %in% rownames(rotation_matrix)]
    stop("The following variables are not in the rotation matrix: ", 
         paste(missing_vars, collapse = ", "))
  }
  
  # Extract the subset of the rotation matrix for our variables of interest
  subset_matrix <- rotation_matrix[variables, , drop = FALSE]
  
  # Function to check if all signs are the same in a vector
  same_signs <- function(x) {
    all(x >= 0) || all(x <= 0)
  }
  
  # Check each PC for sign consistency
  consistent_pcs <- sapply(subset_matrix, same_signs)
  
  # Get the PC names where signs are consistent
  pcs_with_same_signs <- names(consistent_pcs)[consistent_pcs]
  
  # Create a more detailed output
  result <- list(
    consistent_pcs = pcs_with_same_signs,
    sign_patterns = sapply(pcs_with_same_signs, function(pc) {
      ifelse(subset_matrix[, pc][1] >= 0, "positive", "negative")
    }),
    loadings = subset_matrix[, pcs_with_same_signs, drop = FALSE]
  )
  
  return(result)
}

# Get PCs that are aligns for science confidence, value of science, and immigrant student
result <- check_variable_signs(
  rotation_matrix = rotation_df,
  variables = c(
    "science_confidence", 
    "value_of_science", 
    "immigrant_student")
)

# View results
print("PCs with consistent signs:")
print(result$consistent_pcs)

print("\nSign patterns:")
print(result$sign_patterns)

print("\nLoadings for these PCs:")
print(result$loadings)

#####################################################################
######### VISUALIZE PCs with BIPLOTS ################################
#####################################################################

arrow_style <- arrow(
  angle = 20, length = grid::unit(8, "pt"),
  ends = "first", type = "closed"
)


timss_tidy <- timss_italy |> 
  tidy(matrix = "rotation") |> 
  pivot_wider(
    names_from = "PC", values_from = "value",
    names_prefix = "PC"
  )


# # Create a new column for custom labels with multiple conditions
timss_tidy$custom_label <- case_when(
    timss_tidy$column == "female" ~ "female",
    timss_tidy$column == "immigrant_student" ~ "born outside of the country",
    timss_tidy$column == "science_confidence" ~ "science confidence",
    timss_tidy$column == "value_of_science" ~ "student's value of science",
    timss_tidy$column == "immigrant_pA" ~ "immigrant parent",
    timss_tidy$column == "immigrant_pB" ~ "immigrant parent",
    timss_tidy$column == "education_pA" ~ "parent education",
    timss_tidy$column == "education_pB" ~ "parent education",
    timss_tidy$column == "expected_attainment" ~ "student expected educational attainment",
    timss_tidy$column == "same_home_language" ~ "same home and instruction language",
    timss_tidy$column == "science_learning_attitude" ~ "positive attitude toward science",
    timss_tidy$column == "teachers_are_fair" ~ "teacher fairness",
    timss_tidy$column == "enjoy_science" ~ "science enjoyment",
    timss_tidy$column == "wish_no_science" ~ "wish to avoid science",
    timss_tidy$column == "science_boring" ~ "science is boring",
    timss_tidy$column == "science_interesting" ~ "science is interesting",
    timss_tidy$column == "like_science" ~ "I like science",
    timss_tidy$column == "look_forward_science"  ~ "look forward to science",
    timss_tidy$column == "science_world_works" ~ "science teaches how things work",
    timss_tidy$column == "like_experiments"  ~ "like experiments",
    timss_tidy$column == "experiments_frequency" ~ "frequent science experiments",
    timss_tidy$column == "science_favorite"  ~ "science favorite subject",
    timss_tidy$column == "clear_expectations" ~ "clear classroom expectations",
    timss_tidy$column == "understand_teacher"  ~ "teacher easy to understand",
    timss_tidy$column == "clear_answers"  ~ "teacher has clear answers",
    timss_tidy$column == "good_at_explaining"  ~ "teacher is good at explaining",
    timss_tidy$column == "variety_to_learn" ~ "variety of teaching strategies",
    timss_tidy$column == "link_to_already_know" ~ "lessons link to prior knowledge",
    timss_tidy$column == "explains_again"  ~ "teacher explains multiple times",
    timss_tidy$column == "do_well_science"  ~ "I do well in science",
    timss_tidy$column == "science_more_difficult"  ~ "greater science difficulty than peers",
    timss_tidy$column == "science_not_strength"  ~ "science is not my strength",
    timss_tidy$column == "quick_learn_science"  ~ "I learn science quickly",
    timss_tidy$column == "good_difficult_problems"  ~ "I am good at difficult problems",
    timss_tidy$column == "recognized_as_good" ~ "my teacher tells me I am good at science",
    timss_tidy$column == "science_is_confusing"  ~ "science makes me confused",
    timss_tidy$column == "science_hardest_subject"  ~ "science more difficult than other subjects",
    timss_tidy$column == "helps_daily_life" ~ "science as helpful daily",
    timss_tidy$column == "helps_learn_other_subjects" ~ "science necessary for other subjects",
    timss_tidy$column == "helps_college"  ~ "science necessary for college admission",
    timss_tidy$column == "helps_career" ~ "science necessary for desired careers",
    timss_tidy$column == "want_job_with_science"  ~ "I want a job that involves science",
    timss_tidy$column == "helps_get_ahead"  ~ "science important for getting ahead",
    timss_tidy$column == "creates_more_opportunity" ~ "learning science creates future opportunity",
    timss_tidy$column == "important_to_parents"  ~ "important to parents",
    timss_tidy$column == "important_to_me"  ~ "important to do well in science",
    timss_tidy$column == "home_resources"  ~ "home resources",
    timss_tidy$column == "belonging"  ~ "sense of school belonging",

  TRUE ~ ""
)


# temporary labels to facilitate initial analysis of plots
#timss_tidy$custom_label <- match(pca_tidy$column, unique(pca_tidy$column))


# Vectors for variable groupings

orienting_factors <- c("immigrant_student", "science_confidence", "value_of_science")

demographic <- c("female", "education_pA", "education_pB",
                 "immigrant_pA", "immigrant_pB", "same_home_language", "home_resources")

general_affective <- c("belonging", "expected_attainment", 
                       
                       "look_forward_science", "science_favorite", 
                      "do_well_science", "science_more_difficult", "science_not_strength", 
                      "enjoy_science", "wish_no_science", "science_boring", "science_interesting", "like_science",
                       "quick_learn_science",
                       "good_difficult_problems", "science_hardest_subject", "science_is_confusing",
                       "important_to_parents", "important_to_me", "science_learning_attitude" )

specific_affective <- c("teachers_are_fair", "experiments_frequency", "science_world_works", 
                        "clear_expectations", "like_experiments", "clear_expectations",
                        "understand_teacher", "clear_answers", "good_at_explaining",
                        "variety_to_learn", "link_to_already_know", "explains_again",
                        "recognized_as_good", "helps_daily_life", "helps_learn_other_subjects",
                        "helps_college", "helps_career", "want_job_with_science", 
                        "helps_get_ahead", "creates_more_opportunity")

#reality_check <- c("enjoy_science", "wish_no_science", "science_boring", "science_interesting", "like_science",
#                   "do_well_science", "science_more_difficult", "science_not_strength")

p_dem <- timss_tidy |>
  
  ggplot(aes(x = PC6, y = PC7)) +
  geom_segment(
    aes(xend = 0, yend = 0, 
        color = case_when(
          column %in% orienting_factors ~ "#000000",
          column %in% general_affective ~ "#0072B2",
          column %in% demographic ~  "#CC97A7",
          column %in% specific_affective ~ "#D55E00",
          TRUE ~ "#009E73"
        ),
        alpha = case_when(
          column %in% orienting_factors ~ 1,
          column %in% general_affective ~ 0,
          column %in% specific_affective ~ 0.0,
          column %in% demographic ~ 0.7,
          TRUE ~ 0.0
        )),
    arrow = arrow_style
  ) +
  geom_text(
    aes(label = custom_label, 
        color = case_when(
          column %in% orienting_factors ~ "#000000",
          column %in% general_affective ~ "#0072B2",
          column %in% demographic ~ "#CC79A7",
          column %in% specific_affective ~ "#D55E00",
          TRUE ~ "#56B4E9"
        ),
        alpha = case_when(
          
          column %in% orienting_factors ~ 1,
          column %in% general_affective ~ 0,
          column %in% specific_affective ~ .0,
          column %in% demographic ~ .7,
          TRUE ~ 0.30
        )), 
    hjust = -.01,
    vjust = 0.02,
    size = 5
  ) +
  scale_color_identity() + 
  scale_alpha_identity() +
  xlim(-0.5, 0.5) + ylim(-0.5, 0.5) +
  coord_fixed() +
  labs(title = "Detail of Biplot of Principal Components")

p_dem

p_new <- timss_tidy |>
  
  ggplot(aes(x = PC6, y = PC7)) +
  geom_segment(
    aes(xend = 0, yend = 0, 
        color = case_when(
          column %in% orienting_factors ~ "#000000",
          column %in% general_affective ~ "#0072B2",
          column %in% demographic ~  "#CC97A7",
          column %in% specific_affective ~ "#D55E00",
          TRUE ~ "#009E73"
        ),
        alpha = case_when(
          column %in% orienting_factors ~ 1,
          column %in% general_affective ~ 0.6,
          column %in% specific_affective ~ 0.0,
          column %in% demographic ~ 0.6,
          TRUE ~ 0.0
        )),
    arrow = arrow_style
  ) +
  geom_text(
    aes(label = custom_label, 
        color = case_when(
          column %in% orienting_factors ~ "#000000",
          column %in% general_affective ~ "#0072B2",
          column %in% demographic ~ "#CC79A7",
          column %in% specific_affective ~ "#D55E00",
          TRUE ~ "#56B4E9"
        ),
        alpha = case_when(
          
          column %in% orienting_factors ~ 1,
          column %in% general_affective ~ 0.6,
          column %in% specific_affective ~ .0,
          column %in% demographic ~ .6,
          TRUE ~ 0.30
        )), 
    hjust = -.01,
    vjust = 0.02,
    size = 5
  ) +
  scale_color_identity() + 
  scale_alpha_identity() +
  xlim(-0.2, 0.7) + ylim(-0.3, 0.4) +
  coord_fixed() +
  labs(title = "Detail of Biplot of Principal Components")

p_new

p_1Q <- timss_tidy |>
  
  ggplot(aes(x = PC32, y = PC41)) +
  geom_segment(
    aes(xend = 0, yend = 0, 
        color = case_when(
          column %in% orienting_factors ~ "#000000",
          column %in% general_affective ~ "#0072B2",
          column %in% demographic ~ "#CC79A7",
          column %in% specific_affective ~ "#D55E00",
          TRUE ~ "#009E73"
        ),
        alpha = case_when(
          column %in% orienting_factors ~ 1,
          column %in% general_affective ~ 0.5,
          column %in% specific_affective ~ 0.5,
          column %in% demographic ~ 0.5,
          TRUE ~ 0.0
        )),
    arrow = arrow_style
  ) +
  geom_text(
    aes(label = custom_label, 
        color = case_when(
          column %in% orienting_factors ~ "#000000",
          column %in% general_affective ~ "#0072B2",
          column %in% demographic ~ "#CC79A7",
          column %in% specific_affective ~ "#D55E00",
          TRUE ~ "#CC79A7"
        ),
        alpha = case_when(
          
          column %in% orienting_factors ~ 1,
          column %in% general_affective ~ 0.5,
          column %in% specific_affective ~ .5,
          column %in% demographic ~ .7,
          TRUE ~ 0.30
        )), 
    hjust = -.01,
    vjust = 0.02,
    size = 5
  ) +
  scale_color_identity() + 
  scale_alpha_identity() +
  xlim(-0, 0.6) + ylim(-0, 0.6) +
  coord_fixed() +
  labs(title = "Detail of Biplot of Principal Components")

p_1Q

p_2Q <- timss_tidy |>
  
  ggplot(aes(x = PC7, y = PC15)) +
  geom_segment(
    aes(xend = 0, yend = 0, 
        color = case_when(
          column %in% orienting_factors ~ "#000000",
          column %in% general_affective ~ "#0072B2",
          column %in% demographic ~ "#CC79A7",
          column %in% specific_affective ~ "#D55E00",
          TRUE ~ "#009E73"
        ),
        alpha = case_when(
          column %in% orienting_factors ~ 1,
          column %in% general_affective ~ 0.6,
          column %in% specific_affective ~ 0.5,
          column %in% demographic ~ 0.5,
          TRUE ~ 0.0
        )),
    arrow = arrow_style
  ) +
  geom_text(
    aes(label = custom_label, 
        color = case_when(
          column %in% orienting_factors ~ "#000000",
          column %in% general_affective ~ "#0072B2",
          column %in% demographic ~ "#CC79A7",
          column %in% specific_affective ~ "#D55E00",
          TRUE ~ "#CC79A7"
        ),
        alpha = case_when(
          
          column %in% orienting_factors ~ 1,
          column %in% general_affective ~ 0.6,
          column %in% specific_affective ~ .5,
          column %in% demographic ~ .7,
          TRUE ~ 0.30
        )), 
    hjust = .99,
    vjust = 0.8,
    size = 5
  ) +
  scale_color_identity() + 
  scale_alpha_identity() +
  xlim(-0.63, 0) + ylim(-0, 0.6) +
  coord_fixed() +
  labs(title = "Detail of Biplot of Principal Components")

p_2Q

p_3Q <- timss_tidy |>
  
  ggplot(aes(x = PC32, y = PC41)) +
  geom_segment(
    aes(xend = 0, yend = 0, 
        color = case_when(
          column %in% orienting_factors ~ "#000000",
          column %in% general_affective ~ "#0072B2",
          column %in% demographic ~ "#CC79A7",
          column %in% specific_affective ~ "#D55E00",
          TRUE ~ "#009E73"
        ),
        alpha = case_when(
          column %in% orienting_factors ~ 1,
          column %in% general_affective ~ 0.6,
          column %in% specific_affective ~ 0.5,
          column %in% demographic ~ 0.5,
          TRUE ~ 0.0
        )),
    arrow = arrow_style
  ) +
  geom_text(
    aes(label = custom_label, 
        color = case_when(
          column %in% orienting_factors ~ "#000000",
          column %in% general_affective ~ "#0072B2",
          column %in% demographic ~ "#CC79A7",
          column %in% specific_affective ~ "#D55E00",
          TRUE ~ "#CC79A7"
        ),
        alpha = case_when(
          
          column %in% orienting_factors ~ 1,
          column %in% general_affective ~ 0.6,
          column %in% specific_affective ~ .5,
          column %in% demographic ~ .7,
          TRUE ~ 0.30
        )), 
    hjust = .99,
    vjust = 0.8,
    size = 5
  ) +
  scale_color_identity() + 
  scale_alpha_identity() +
  xlim(-0.63, 0) + ylim(-0.3, 0) +
  coord_fixed() +
  labs(title = "Detail of Biplot of Principal Components")

p_3Q

p_4Q <- timss_tidy |>
  
  ggplot(aes(x = PC15, y = PC41)) +
  geom_segment(
    aes(xend = 0, yend = 0, 
        color = case_when(
          column %in% orienting_factors ~ "#000000",
          column %in% general_affective ~ "#0072B2",
          column %in% demographic ~ "#CC79A7",
          column %in% specific_affective ~ "#D55E00",
          TRUE ~ "#009E73"
        ),
        alpha = case_when(
          column %in% orienting_factors ~ 1,
          column %in% general_affective ~ 0.6,
          column %in% specific_affective ~ 0.5,
          column %in% demographic ~ 0.7,
          TRUE ~ 0.0
        )),
    arrow = arrow_style
  ) +
  geom_text(
    aes(label = custom_label, 
        color = case_when(
          column %in% orienting_factors ~ "#000000",
          column %in% general_affective ~ "#0072B2",
          column %in% demographic ~ "#CC79A7",
          column %in% specific_affective ~ "#D55E00",
          TRUE ~ "#CC79A7"
        ),
        alpha = case_when(
          
          column %in% orienting_factors ~ 1,
          column %in% general_affective ~ 0.6,
          column %in% specific_affective ~ .5,
          column %in% demographic ~ .7,
          TRUE ~ 0.30
        )), 
    hjust = -.01,
    vjust = 0.02,
    size = 5
  ) +
  scale_color_identity() + 
  scale_alpha_identity() +
  xlim(-0, 0.6) + ylim(-0.5, 0) +
  coord_fixed() +
  labs(title = "Detail of Biplot of Principal Components")

p_4Q


