#I load the 2006 file from github
Year1="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2006.csv"
#I import rio package for this file.
YearOne = rio::import(file = Year1)
#I load the 2007 file from github
Year2="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2007.csv"
#I import rio package for this file.
YearTwo = rio::import(file = Year2)
#I load the 2008 file from github
Year3="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2008.csv"
#I import rio package for this file.
YearThree = rio::import(file = Year3)
#I load the 2009 file from github
Year4="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2009.csv"
#I import rio package for this file.
YearFour = rio::import(file = Year4)
#I load the 2010 file from github
Year5="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2010.csv"
#I import rio package for this file.
YearFive = rio::import(file = Year5)
#I load the 2011 file from github
Year6="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2011.csv"
#I import rio package for this file.
YearSix = rio::import(file = Year6)
#I load the 2012 file from github
Year7="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2012.csv"
#I import rio package for this file.
YearSeven = rio::import(file = Year7)
#I load the 2013 file from github
Year8="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2013.csv"
#I import rio package for this file.
YearEight = rio::import(file = Year8)
#I load the 2014 file from github
Year9="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2014.csv"
#I import rio package for this file.
YearNine = rio::import(file = Year9)
#I load the 2015 file from github
Year10="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2015.csv"
#I import rio package for this file.
YearTen = rio::import(file = Year10)
#I load the 2016 file from github
Year11="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2016.csv"
#I import rio package for this file.
YearEleven = rio::import(file = Year11)
#I load the 2017 file from github
Year12="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2017.csv"
#I import rio package for this file.
YearTwelve = rio::import(file = Year12)
#I load the 2018 file from github
Year13="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2018.csv"
#I import rio package for this file.
YearThirteen = rio::import(file = Year13)
#I load the 2019 file from github
Year14="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2019.csv"
#I import rio package for this file.
YearFourteen = rio::import(file = Year14)
#I load the 2020 file from github
Year15="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2020.csv"
#I import rio package for this file.
YearFifteen = rio::import(file = Year15)
#I load the 2021 file from github
Year16="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2021.csv"
#I import rio package for this file.
YearSixteen = rio::import(file = Year16)
#I load the 2022 file from github
Year17="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/fsi-2022-download.csv"
#I import rio package for this file.
YearSeventeen = rio::import(file = Year17)
#I load the 2023 file from github
Year18="https://github.com/Fundamentals-Sarah/HW5Repo1/raw/refs/heads/main/FSI-2023-DOWNLOAD%20(3).csv"
#I import rio package for this file.
YearEighteen = rio::import(file = Year18)
#Using my dataframes under the Environment tab, I notice that Years 8, 10, 12, 14, 15, and 17 have a very high number of observations compared to other years. I take a look at the head for 8 to see what's going on. It appears normal.
head(YearEight)
#I take a look at the tail for eight. I see many rows with NA. I will want to trim these out for all dataframes that include these.
tail(YearEight)
#I keep only the rows with country information.
YearEight_noNA=YearEight[1:178,]
#I confirm this works.
tail(YearEight_noNA)
#I keep only the rows with country information.
YearTen_noNA=YearTen[1:178,]
#I confirm this works.
tail(YearTen_noNA)
#I keep only the rows with country information.
YearTwelve_noNA=YearTwelve[1:178,]
#I confirm this works.
tail(YearTwelve_noNA)
#I keep only the rows with country information.
YearThirteen_noNA=YearThirteen[1:178,]
#I confirm this works.
tail(YearThirteen_noNA)
#I keep only the rows with country information.
YearFourteen_noNA=YearFourteen[1:178,]
#I confirm this works.
tail(YearFourteen_noNA)
#I keep only the rows with country information.
YearFifteen_noNA=YearFifteen[1:178,]
#I confirm this works.
tail(YearFifteen_noNA)
#I keep only the rows with country information.
YearSeventeen_noNA=YearSeventeen[1:179,]
#I confirm this works.
tail(YearSeventeen_noNA)
#Using my dataframes in the Environment tab, I notice that Years 14, 15, and 17 include columns I do not want. I pull a list of column names, numbered, for Year 14. 
names(YearFourteen_noNA)
##  [1] "Country"                          "Year"                            
##  [3] "Rank"                             "Total"                           
##  [5] "C1: Security Apparatus"           "C2: Factionalized Elites"        
##  [7] "C3: Group Grievance"              "E1: Economy"                     
##  [9] "E2: Economic Inequality"          "E3: Human Flight and Brain Drain"
## [11] "P1: State Legitimacy"             "P2: Public Services"             
## [13] "P3: Human Rights"                 "S1: Demographic Pressures"       
## [15] "S2: Refugees and IDPs"            "X1: External Intervention"       
## [17] "Change from Previous Year"
#I create a new version of Year Fourteen, with just the columns I want to keep.
YearFourteen_new <- YearFourteen_noNA[, c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)]
#I check that Year Fourteen is now how I want it to appear. 
names(YearFourteen_new)
##  [1] "Country"                          "Year"                            
##  [3] "Rank"                             "Total"                           
##  [5] "C1: Security Apparatus"           "C2: Factionalized Elites"        
##  [7] "C3: Group Grievance"              "E1: Economy"                     
##  [9] "E2: Economic Inequality"          "E3: Human Flight and Brain Drain"
## [11] "P1: State Legitimacy"             "P2: Public Services"             
## [13] "P3: Human Rights"                 "S1: Demographic Pressures"       
## [15] "S2: Refugees and IDPs"            "X1: External Intervention"
#I pull a list of column names, numbered, for Year 15. 
names(YearFifteen_noNA)
##  [1] "Country"                          "Year"                            
##  [3] "Rank"                             "Total"                           
##  [5] "C1: Security Apparatus"           "C2: Factionalized Elites"        
##  [7] "C3: Group Grievance"              "E1: Economy"                     
##  [9] "E2: Economic Inequality"          "E3: Human Flight and Brain Drain"
## [11] "P1: State Legitimacy"             "P2: Public Services"             
## [13] "P3: Human Rights"                 "S1: Demographic Pressures"       
## [15] "S2: Refugees and IDPs"            "X1: External Intervention"       
## [17] "Change from Previous Year"        "V18"                             
## [19] "V19"                              "V20"                             
## [21] "V21"                              "V22"                             
## [23] "V23"                              "V24"                             
## [25] "V25"                              "V26"                             
## [27] "V27"                              "V28"                             
## [29] "V29"                              "V30"                             
## [31] "V31"                              "V32"                             
## [33] "V33"                              "V34"                             
## [35] "V35"                              "V36"                             
## [37] "V37"                              "V38"                             
## [39] "V39"
#I create a new version of Year Fifteen, with just the columns I want to keep.
YearFifteen_new <- YearFifteen_noNA[, c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)]
#I check that Year Fifteen is now how I want it to appear. 
names(YearFifteen_new)
##  [1] "Country"                          "Year"                            
##  [3] "Rank"                             "Total"                           
##  [5] "C1: Security Apparatus"           "C2: Factionalized Elites"        
##  [7] "C3: Group Grievance"              "E1: Economy"                     
##  [9] "E2: Economic Inequality"          "E3: Human Flight and Brain Drain"
## [11] "P1: State Legitimacy"             "P2: Public Services"             
## [13] "P3: Human Rights"                 "S1: Demographic Pressures"       
## [15] "S2: Refugees and IDPs"            "X1: External Intervention"
#I pull a list of column names, numbered, for Year 17. 
names(YearSeventeen_noNA)
##  [1] "Country"                          "Year"                            
##  [3] "Rank"                             "Total"                           
##  [5] "C1: Security Apparatus"           "C2: Factionalized Elites"        
##  [7] "C3: Group Grievance"              "E1: Economy"                     
##  [9] "E2: Economic Inequality"          "E3: Human Flight and Brain Drain"
## [11] "P1: State Legitimacy"             "P2: Public Services"             
## [13] "P3: Human Rights"                 "S1: Demographic Pressures"       
## [15] "S2: Refugees and IDPs"            "X1: External Intervention"       
## [17] "V17"                              "V18"                             
## [19] "V19"
#I create a new version of Year Seventeen, with just the columns I want to keep.
YearSeventeen_new <- YearSeventeen_noNA[, c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)]
#I check that Year Seventeen is now how I want it to appear. 
names(YearSeventeen_new)
##  [1] "Country"                          "Year"                            
##  [3] "Rank"                             "Total"                           
##  [5] "C1: Security Apparatus"           "C2: Factionalized Elites"        
##  [7] "C3: Group Grievance"              "E1: Economy"                     
##  [9] "E2: Economic Inequality"          "E3: Human Flight and Brain Drain"
## [11] "P1: State Legitimacy"             "P2: Public Services"             
## [13] "P3: Human Rights"                 "S1: Demographic Pressures"       
## [15] "S2: Refugees and IDPs"            "X1: External Intervention"
#I tried to combine my data but received an Error that column names do not match across all sheets. I identify that YearEight_noNA has extra periods in the column names.
colnames(YearEight_noNA) <- c("Country", "Year", "Rank", "Total", "C1: Security Apparatus", "C2: Factionalized Elites", "C3: Group Grievance", "E1: Economy", "E2: Economic Inequality", "E3: Human Flight and Brain Drain", "P1: State Legitimacy", "P2: Public Services", "P3: Human Rights", "S1: Demographic Pressures", "S2: Refugees and IDPs", "X1: External Intervention")
#I check it. 
names(YearEight_noNA)
##  [1] "Country"                          "Year"                            
##  [3] "Rank"                             "Total"                           
##  [5] "C1: Security Apparatus"           "C2: Factionalized Elites"        
##  [7] "C3: Group Grievance"              "E1: Economy"                     
##  [9] "E2: Economic Inequality"          "E3: Human Flight and Brain Drain"
## [11] "P1: State Legitimacy"             "P2: Public Services"             
## [13] "P3: Human Rights"                 "S1: Demographic Pressures"       
## [15] "S2: Refugees and IDPs"            "X1: External Intervention"
#Now that I've removed blank rows and my columns match across all years in all spreadsheets, I vertically combine my data.
YearsCombined <- rbind(YearOne, YearTwo, YearThree, YearFour, YearFive, YearSix, YearSeven, YearEight_noNA, YearNine, YearTen_noNA, YearEleven, YearTwelve_noNA, YearThirteen_noNA, YearFourteen_new, YearFifteen_new, YearSixteen, YearSeventeen_new, YearEighteen)
#I check the overall information about YearsCombined, including number of observations and variables appearing correct. I check the columns format.
str(YearsCombined)
## 'data.frame':    3170 obs. of  16 variables:
##  $ Country                         : chr  "Sudan" "Congo Democratic Republic" "Cote d'Ivoire" "Iraq" ...
##  $ Year                            : int  2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 ...
##  $ Rank                            : chr  "1st" "2nd" "3rd" "4th" ...
##  $ Total                           : num  112 110 109 109 109 ...
##  $ C1: Security Apparatus          : num  9.8 9.8 9.8 9.8 9.4 9.4 10 9.4 9.1 8.2 ...
##  $ C2: Factionalized Elites        : num  9.1 9.6 9.8 9.7 8.5 9.5 9.8 9.6 9.1 8 ...
##  $ C3: Group Grievance             : num  9.7 9.1 9.8 9.8 8.5 8.5 8 8.8 8.6 9.1 ...
##  $ E1: Economy                     : num  7.5 8.1 9 8.2 9.8 7.9 8.5 8.4 7 7.5 ...
##  $ E2: Economic Inequality         : num  9.2 9 8 8.7 9.2 9 7.5 8.3 8.9 8 ...
##  $ E3: Human Flight and Brain Drain: num  9.1 8 8.5 9.1 9 8 7 8 8.1 7 ...
##  $ P1: State Legitimacy            : num  9.5 9 10 8.5 8.9 9.5 10 9.4 8.5 8.3 ...
##  $ P2: Public Services             : num  9.5 9 8.5 8.3 9.5 9 10 9.3 7.5 8 ...
##  $ P3: Human Rights                : num  9.8 9.5 9.4 9.7 9.5 9.1 9.5 9.6 8.5 8.2 ...
##  $ S1: Demographic Pressures       : num  9.6 9.5 8.8 8.9 9.7 9 9 8.8 9.3 7.9 ...
##  $ S2: Refugees and IDPs           : num  9.7 9.5 7.6 8.3 8.9 9 8.1 5 9.3 9.6 ...
##  $ X1: External Intervention       : num  9.8 10 10 10 8 8 8.5 10 9.2 10 ...
#I notice Rank is being categorized as character, not integer, and I want to correct this. 
YearsCombined$Rank <- as.integer(gsub("[^0-9-]", "", YearsCombined$Rank))
#I check that my change applied correctly. I also confirm that Year is correctly identified as integer.
str(YearsCombined)
## 'data.frame':    3170 obs. of  16 variables:
##  $ Country                         : chr  "Sudan" "Congo Democratic Republic" "Cote d'Ivoire" "Iraq" ...
##  $ Year                            : int  2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 ...
##  $ Rank                            : int  1 2 3 4 5 6 6 8 9 10 ...
##  $ Total                           : num  112 110 109 109 109 ...
##  $ C1: Security Apparatus          : num  9.8 9.8 9.8 9.8 9.4 9.4 10 9.4 9.1 8.2 ...
##  $ C2: Factionalized Elites        : num  9.1 9.6 9.8 9.7 8.5 9.5 9.8 9.6 9.1 8 ...
##  $ C3: Group Grievance             : num  9.7 9.1 9.8 9.8 8.5 8.5 8 8.8 8.6 9.1 ...
##  $ E1: Economy                     : num  7.5 8.1 9 8.2 9.8 7.9 8.5 8.4 7 7.5 ...
##  $ E2: Economic Inequality         : num  9.2 9 8 8.7 9.2 9 7.5 8.3 8.9 8 ...
##  $ E3: Human Flight and Brain Drain: num  9.1 8 8.5 9.1 9 8 7 8 8.1 7 ...
##  $ P1: State Legitimacy            : num  9.5 9 10 8.5 8.9 9.5 10 9.4 8.5 8.3 ...
##  $ P2: Public Services             : num  9.5 9 8.5 8.3 9.5 9 10 9.3 7.5 8 ...
##  $ P3: Human Rights                : num  9.8 9.5 9.4 9.7 9.5 9.1 9.5 9.6 8.5 8.2 ...
##  $ S1: Demographic Pressures       : num  9.6 9.5 8.8 8.9 9.7 9 9 8.8 9.3 7.9 ...
##  $ S2: Refugees and IDPs           : num  9.7 9.5 7.6 8.3 8.9 9 8.1 5 9.3 9.6 ...
##  $ X1: External Intervention       : num  9.8 10 10 10 8 8 8.5 10 9.2 10 ...
#I save my fully cleaned dataset as an RDS file. 
saveRDS(YearsCombined,"YearsCombined.RDS")
#I name my file and ensure it is pulled in correctly in the RDS format
YearsCombinedRDS=readRDS("YearsCombined.RDS")
str(YearsCombinedRDS)
## 'data.frame':    3170 obs. of  16 variables:
##  $ Country                         : chr  "Sudan" "Congo Democratic Republic" "Cote d'Ivoire" "Iraq" ...
##  $ Year                            : int  2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 ...
##  $ Rank                            : int  1 2 3 4 5 6 6 8 9 10 ...
##  $ Total                           : num  112 110 109 109 109 ...
##  $ C1: Security Apparatus          : num  9.8 9.8 9.8 9.8 9.4 9.4 10 9.4 9.1 8.2 ...
##  $ C2: Factionalized Elites        : num  9.1 9.6 9.8 9.7 8.5 9.5 9.8 9.6 9.1 8 ...
##  $ C3: Group Grievance             : num  9.7 9.1 9.8 9.8 8.5 8.5 8 8.8 8.6 9.1 ...
##  $ E1: Economy                     : num  7.5 8.1 9 8.2 9.8 7.9 8.5 8.4 7 7.5 ...
##  $ E2: Economic Inequality         : num  9.2 9 8 8.7 9.2 9 7.5 8.3 8.9 8 ...
##  $ E3: Human Flight and Brain Drain: num  9.1 8 8.5 9.1 9 8 7 8 8.1 7 ...
##  $ P1: State Legitimacy            : num  9.5 9 10 8.5 8.9 9.5 10 9.4 8.5 8.3 ...
##  $ P2: Public Services             : num  9.5 9 8.5 8.3 9.5 9 10 9.3 7.5 8 ...
##  $ P3: Human Rights                : num  9.8 9.5 9.4 9.7 9.5 9.1 9.5 9.6 8.5 8.2 ...
##  $ S1: Demographic Pressures       : num  9.6 9.5 8.8 8.9 9.7 9 9 8.8 9.3 7.9 ...
##  $ S2: Refugees and IDPs           : num  9.7 9.5 7.6 8.3 8.9 9 8.1 5 9.3 9.6 ...
##  $ X1: External Intervention       : num  9.8 10 10 10 8 8 8.5 10 9.2 10 ...
#I save my cleaned file as a csv
write.csv(YearsCombined,"YearsCombined.csv", row.names=FALSE)
YearsCombinedCSV=read.csv("YearsCombined.csv")
#I check that the cleaned csv can be read in correctly
str(YearsCombinedCSV)
## 'data.frame':    3170 obs. of  16 variables:
##  $ Country                         : chr  "Sudan" "Congo Democratic Republic" "Cote d'Ivoire" "Iraq" ...
##  $ Year                            : int  2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 ...
##  $ Rank                            : int  1 2 3 4 5 6 6 8 9 10 ...
##  $ Total                           : num  112 110 109 109 109 ...
##  $ C1..Security.Apparatus          : num  9.8 9.8 9.8 9.8 9.4 9.4 10 9.4 9.1 8.2 ...
##  $ C2..Factionalized.Elites        : num  9.1 9.6 9.8 9.7 8.5 9.5 9.8 9.6 9.1 8 ...
##  $ C3..Group.Grievance             : num  9.7 9.1 9.8 9.8 8.5 8.5 8 8.8 8.6 9.1 ...
##  $ E1..Economy                     : num  7.5 8.1 9 8.2 9.8 7.9 8.5 8.4 7 7.5 ...
##  $ E2..Economic.Inequality         : num  9.2 9 8 8.7 9.2 9 7.5 8.3 8.9 8 ...
##  $ E3..Human.Flight.and.Brain.Drain: num  9.1 8 8.5 9.1 9 8 7 8 8.1 7 ...
##  $ P1..State.Legitimacy            : num  9.5 9 10 8.5 8.9 9.5 10 9.4 8.5 8.3 ...
##  $ P2..Public.Services             : num  9.5 9 8.5 8.3 9.5 9 10 9.3 7.5 8 ...
##  $ P3..Human.Rights                : num  9.8 9.5 9.4 9.7 9.5 9.1 9.5 9.6 8.5 8.2 ...
##  $ S1..Demographic.Pressures       : num  9.6 9.5 8.8 8.9 9.7 9 9 8.8 9.3 7.9 ...
##  $ S2..Refugees.and.IDPs           : num  9.7 9.5 7.6 8.3 8.9 9 8.1 5 9.3 9.6 ...
##  $ X1..External.Intervention       : num  9.8 10 10 10 8 8 8.5 10 9.2 10 ...
#I create a subset of YearsCombined. 
Combined_subset <- YearsCombined
#I drop all columns that don't relate to my boxplot parameters.
Combined_subset <- Combined_subset[, c(1,2,4)]
#I check that the right columns are still included.
str(Combined_subset)
## 'data.frame':    3170 obs. of  3 variables:
##  $ Country: chr  "Sudan" "Congo Democratic Republic" "Cote d'Ivoire" "Iraq" ...
##  $ Year   : int  2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 ...
##  $ Total  : num  112 110 109 109 109 ...
#I make the year variable a factor variable.
Combined_subset$Year <- factor(Combined_subset$Year)
#I check that Year is now a factor variable.
str(Combined_subset)
## 'data.frame':    3170 obs. of  3 variables:
##  $ Country: chr  "Sudan" "Congo Democratic Republic" "Cote d'Ivoire" "Iraq" ...
##  $ Year   : Factor w/ 18 levels "2006","2007",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Total  : num  112 110 109 109 109 ...
#I install and load ggplot. When I went to Knit, I kept getting an error; I consulted Google Collab, which is how I got the code below for CRAN.
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("ggplot2")
## Installing package into 'C:/Users/sarah/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\sarah\AppData\Local\Temp\RtmpIhazAP\downloaded_packages
#I load ggplot library.
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
#I tell ggplot the data I will want to use. 
base=ggplot(data=Combined_subset)
#I add the geom. 
base + geom_boxplot(aes(x=Year, y=Total))

#I generate and assign a title, subtitle, and caption to my boxplot.
theTitle='Fragility Index Total Per Year'
theSubtitle='All countries, 2006-2023'
theCaption=paste("Source:Fragile States Index, The Fund for Peace")

box=base + geom_boxplot(aes(x=Year, y=Total))
box + labs(title=theTitle,
           subtitle=theSubtitle,
           caption = theCaption,
           x='Year')

#I combine 2013 and 2023 data.
TwoYears <- rbind(YearEight_noNA,YearEighteen)
#I check the names of the columns.
names(TwoYears)
##  [1] "Country"                          "Year"                            
##  [3] "Rank"                             "Total"                           
##  [5] "C1: Security Apparatus"           "C2: Factionalized Elites"        
##  [7] "C3: Group Grievance"              "E1: Economy"                     
##  [9] "E2: Economic Inequality"          "E3: Human Flight and Brain Drain"
## [11] "P1: State Legitimacy"             "P2: Public Services"             
## [13] "P3: Human Rights"                 "S1: Demographic Pressures"       
## [15] "S2: Refugees and IDPs"            "X1: External Intervention"
#I keep only those columns with information.
TwoYears_subset<- TwoYears[, c(1,2,5,6,7)]
#I rename columns to be easier to work with. 
names(TwoYears_subset)[1]<- "Country"
names(TwoYears_subset)[2]<- "Year"
names(TwoYears_subset)[3]<- "Security"
names(TwoYears_subset)[4]<- "Elites"
names(TwoYears_subset)[5]<- "Grievance"
#I examine my subset of 2013 and 2023 data. 
str(TwoYears_subset)
## 'data.frame':    357 obs. of  5 variables:
##  $ Country  : chr  "Somalia" "Congo Democratic Republic" "Sudan" "South Sudan" ...
##  $ Year     : int  2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
##  $ Security : num  9.7 10 9.8 9.6 9.4 9.8 9.9 7.9 9.7 8.4 ...
##  $ Elites   : num  10 9.5 10 9.8 9.5 9.5 9.4 9 9.1 9.7 ...
##  $ Grievance: num  9.3 9.4 10 10 8.8 9 9.2 7 8.5 8.4 ...
#I assign the Year variable to be a factor, not integer.
TwoYears_subset$Year <- factor(TwoYears_subset$Year)
#I check it. 
str(TwoYears_subset)
## 'data.frame':    357 obs. of  5 variables:
##  $ Country  : chr  "Somalia" "Congo Democratic Republic" "Sudan" "South Sudan" ...
##  $ Year     : Factor w/ 2 levels "2013","2023": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Security : num  9.7 10 9.8 9.6 9.4 9.8 9.9 7.9 9.7 8.4 ...
##  $ Elites   : num  10 9.5 10 9.8 9.5 9.5 9.4 9 9.1 9.7 ...
##  $ Grievance: num  9.3 9.4 10 10 8.8 9 9.2 7 8.5 8.4 ...
#I take my two years of data and transform it into the LONG format.
TwoYears_long=tidyr::pivot_longer(TwoYears_subset,
                                       cols = c(Security, Elites, Grievance),
                                       names_to = "var",
                                       values_to = "index")
TwoYears_long
#I tried to generate the histograms for C1, C2, and C3 for 2013 and 2023 data, but could not - I receive an error that object "ffi_list2" is not found. There, I'm exporting my file as a RDS and CVS and showing the code I tried below.
#base2 = ggplot(data=TwoYears_long)
#base2 + geom_histogram(aes(x=index),bins=10) +
                   #facet_wrap(var ~ Year)
#I save my fully cleaned dataset as an RDS file. 
saveRDS(TwoYears_long,"TwoYearsLong.RDS")
#I name my file and ensure it is pulled in correctly in the RDS format
TwoYearsLongRDS=readRDS("TwoYearsLong.RDS")
str(TwoYearsLongRDS)
## tibble [1,071 × 4] (S3: tbl_df/tbl/data.frame)
##  $ Country: chr [1:1071] "Somalia" "Somalia" "Somalia" "Congo Democratic Republic" ...
##  $ Year   : Factor w/ 2 levels "2013","2023": 1 1 1 1 1 1 1 1 1 1 ...
##  $ var    : chr [1:1071] "Security" "Elites" "Grievance" "Security" ...
##  $ index  : num [1:1071] 9.7 10 9.3 10 9.5 9.4 9.8 10 10 9.6 ...
#I save my cleaned file as a csv
write.csv(TwoYears_long,"TwoYearsLong.csv", row.names=FALSE)
TwoYearsLongCSV=read.csv("TwoYearsLong.csv")
#I check that the cleaned csv can be read in correctly
str(TwoYearsLongCSV)
## 'data.frame':    1071 obs. of  4 variables:
##  $ Country: chr  "Somalia" "Somalia" "Somalia" "Congo Democratic Republic" ...
##  $ Year   : int  2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
##  $ var    : chr  "Security" "Elites" "Grievance" "Security" ...
##  $ index  : num  9.7 10 9.3 10 9.5 9.4 9.8 10 10 9.6 ...
#I also try limiting the data to one year. 
long_2013=TwoYears_long[TwoYears_long$Year==2013,c('Country','var', 'index')]

long_2013
#facets_free_histo, eval=TRUE, echo=TRUE, message=FALSE}
#I tried again, just using 2013 data, this also did not work. I get the same error - object ffi_list2 is not found.
#base3 = ggplot(data=long_2013)
#base3 + geom_histogram(aes(x=index),bins=10) +
                        #facet_wrap(var ~ Year)