School quality and nearby housing values are inherently linked. While the direction of the causal relationship is unclear (whether changes in school quality cause changes in housing or vice versa), there is certainly a strong correlation between school quality and nearby housing values. This dataset of test averages from Syracuse City School District for years 2005, 2010, and 2015 allows us to begin exploring this relationship in the Syracuse area.
This dataset was provided by the Syracuse School District and contains the following information for 2005, 2010 and 2015:
Year, School Name, Test Type (ELA, MATH, Science, Social Studies and Various NY State Regents Tests), and Average Test Score. ELA and Math Score are out of 800 points and the other test types are out of 100 points.
The below code will first standardize all test scores to a 0-100 percent scale. Then the data will be wrangled by averaging all averaged tests scores across schools for 2005, 2010, 2015. This will produce an average test score per school per year. Then, the averaged score for an individual school is averaged with a school in the same census to produce an averaged score for each census tract for years 2005, 2010 and 2015.
In addition to the above wrangling, all schools are geocoded and then spatial joined with Syracuse census tract files so school test scores can be analyzed and mapped at the census tract level.
Lastly, some descriptive statistics, mapping and graphs are provided below the data wrangling.
#load in data from school district with test scores
#setwd("/Users/beelerenator/Documents/Graduate School/MPA Syracuse/DDMII/DDMII Project/BUILD/Schools")
setwd("..")
setwd("..")
setwd("./DATA/RAW_DATA")
school.tests <- read.csv( "schooldata2005_2010_2015.csv", stringsAsFactors=FALSE )
school.tests <- na.omit(school.tests)
Take out non-schools from school variable & Make binary variable if school is open or closed
#clean data -- take out non-schools from school variable
school.tests <- filter(school.tests, Accountability.School != "24" & Accountability.School != "34" & Accountability.School != "40", Accountability.School != "9", Accountability.School != "28", Accountability.School != "363", Accountability.School != "GED 16-17", Accountability.School != "GED 18+", Accountability.School != "3" )
#make binary variable if school is open or closed
open.schools <- school.tests$Accountability.School!="Elmwood Elementary School - CLOSED" & school.tests$Accountability.School!="Levy Middle School - CLOSED" &
school.tests$Accountability.School!="Bellevue Middle School Academy at Shea- CLOSED" & school.tests$Accountability.School!="Blodgett Middle School - CLOSED" & school.tests$Accountability.School!="Westside Academy @ Blodgett Elementary- CLOSED"
open.schools <- data.frame(open.schools)
school.tests <- cbind(school.tests, open.schools)
After Geocoding Schools:
#Geocode schools
#addresses.to.geocode <- paste( school.tests$Accountability.School, "Syracuse, NY", sep=", " )
#school.lat.long <- geocode( addresses.to.geocode )
#Save lat.long list to .csv file
#write.csv(school.lat.long, file = "school.lat.lon.csv")
setwd("..")
setwd("..")
setwd("./DATA/RAW_DATA")
school.lat.long <- read.csv("syracuse_school.lat.lon.csv", stringsAsFactors=FALSE )
school.lat.long <- school.lat.long[ c("lon","lat")]
#add lat.long list to school dataframe
school.tests <- cbind(school.tests, school.lat.long)
#standardizing all test score to percents across all schools
#ELA and Math tests are out of 800. All of the other test types are out of 100
school.tests$percent.score <- ifelse(school.tests$Test_Code=="ELA" | school.tests$Test_Code=="MATH", (school.tests$mean/800), school.tests$mean/100)
All test scores are averaged for each school in 2005, 2010, 2015
#average test score for a given school in a given year
school.tests.grouped <- group_by(school.tests, Test.Year, Accountability.School)
school.test.avgd <- summarise(school.tests.grouped,
Score=mean(percent.score),
lon=unique(lon),
lat=unique(lat),
school.open=unique(open.schools))
school.test.avgd <- data.frame(school.test.avgd)
#load in census tracts
setwd("..")
setwd("..")
setwd("./SHAPEFILES")
census.syr <- geojson_read("SYRCensusTracts.geojson", method="local", what="sp")
census.syr <- spTransform(census.syr, CRS( "+proj=longlat +datum=WGS84"))
#turn shapefile into data frame
dat.census.syr <- data.frame(census.syr)
#turn lat.long into spatial points and match it with onongoda map
school.lat.long2 <- dplyr::select(school.test.avgd, lon, lat)
school.lat.long2 <- SpatialPoints(school.lat.long2, proj4string=CRS("+proj=longlat +datum=WGS84") )
census.matched.to.points <- over(school.lat.long2, census.syr )
#bind census tracts to schools
school.w.census <- cbind( school.test.avgd, census.matched.to.points)
school.w.census <- na.omit(school.w.census)
#write csv for school point desgregated data
#setwd("..")
setwd("..")
setwd("./DATA/PROCESSED_DATA")
write.csv(school.w.census, file = "schools_processed.csv")
Wrangle so that there is an average score per census tract per year.
#average school averaged test score across census tracts across years
school.w.census.grouped <- group_by(school.w.census, Test.Year, TRACTCE10)
census.score.year <- summarise(school.w.census.grouped,
Avg.Score=mean(Score),
STATEFP10=unique(STATEFP10),
COUNTYFP10=unique(COUNTYFP10),
GEOID10=unique(GEOID10),
NAME10=unique(NAME10),
NAMELSAD10=unique(NAMELSAD10),
MTFCC10=unique(MTFCC10),
FUNCSTAT10=unique(FUNCSTAT10),
ALAND10=unique(ALAND10),
AWATER10=unique(AWATER10),
INTPTLAT10=unique(INTPTLAT10),
INTPTLON10=unique(INTPTLON10))
# Subset for merge and rename variables for merge #GEOID keep and rename TRACT
census.score.year.subset <- dplyr::select(census.score.year, Test.Year, GEOID10, Avg.Score)
census.score.year.subset <- plyr::rename(census.score.year.subset, c("Test.Year"="YEAR", "GEOID10" = "TRACT", "Avg.Score" = "SCHOOL_SCORE"))
#write csv to be uploaded to github
setwd("..")
setwd("..")
setwd("./DATA/AGGREGATED_DATA")
write.csv(census.score.year.subset, file = "schools_aggregated.csv")
Showing Test Performance for all Schools Between 2015, 2010, 2005
#bar graphs for all school data points
allschool.data.2015 <- filter(school.tests, Test.Year == 2015)
allschool.data.2010 <- filter(school.tests, Test.Year == 2010)
allschool.data.2005 <- filter(school.tests, Test.Year == 2005)
Group.allschool.test <-group_by(school.tests, Test.Year)
Avg.scoretests <- summarise(Group.allschool.test, Avg.score=mean(percent.score, na.rm=T))
Avg.scoretests$Avg.score <- Avg.scoretests$Avg.score*100
par(family="Georgia")
barplot(Avg.scoretests$Avg.score,
main="Average Test Score by Year",
xlab="Year", ylab = "Percent",
names.arg=Avg.scoretests$Test.Year, space=0,
col= c("dodgerblue4", "dodgerblue4", "dodgerblue4"), ylim=c(0,80)
)
text(0.4574257,74.19680, "71.68 %")
text(1.4781278,71.95029, "69.72 %")
text(2.4907291,56.97356, "53.99 %")
Averaged School Scores by Census Tracts 2015
ed.data.2015 <- filter(census.score.year, Test.Year == 2015)
ed.data.2010 <- filter(census.score.year, Test.Year == 2010)
ed.data.2005 <- filter(census.score.year, Test.Year == 2005)
#map of 2015 schools by census tract
color.function <- colorRampPalette( c("firebrick4","light gray","steel blue" ) )
col.ramp <- color.function( 5 ) # number of groups you desire
color.vector <- cut( rank(ed.data.2015$Avg.Score), breaks=5, labels=col.ramp )
color.vector <- as.character( color.vector )
this.order <- match( dat.census.syr$TRACTCE10, ed.data.2015$TRACTCE10 )
color.vec.ordered <- color.vector[ this.order ]
par(family="Georgia")
plot(census.syr, col=color.vec.ordered, main="Averaged Test Scores by Census Tracts 2015")
breaks.Score2015 <-classIntervals(ed.data.2015$Avg.Score, n=5, style="quantile")
breaks.Score2015$brks <- breaks.Score2015$brks*100
breaks.Score2015$brks <- round(breaks.Score2015$brks, 2)
breaks.Score2015$brks <- paste(breaks.Score2015$brks,"%")
legend( "bottomright", bg="white",
pch=19, pt.cex=1.5, cex=0.7,
legend=capitalize(leglabs(breaks.Score2015$brks)),
col=col.ramp,
box.col="white",
title="Averaged Test Scores", title.adj = .1, xjust=1
)
Syracuse Schools Color-Coded By Averaged School Score for 2015
#schools for 2015
school.data.2015 <- filter(school.w.census, Test.Year == 2015)
color.function <- colorRampPalette( c("firebrick4","light gray","steel blue" ) )
col.ramp.sc <- color.function( 5 ) # number of groups you desire
color.vector.sc <- cut( rank(school.data.2015$Score), breaks=5, labels=col.ramp.sc )
color.vector.sc <- as.character( color.vector.sc )
#this.order <- match( dat.census.syr$TRACTCE10, ed.data.2015$TRACTCE10 )
#color.vec.ordered <- color.vector[ this.order ]
par(family="Georgia")
plot(census.syr, main="Averaged Test Scores by Schools 2015")
points(school.data.2015$lon, school.data.2015$lat, col=color.vector.sc, pch=20, cex=3)
breaks.Sch.score2015 <-classIntervals(school.data.2015$Score, n=5, style="quantile")
breaks.Sch.score2015$brks <- breaks.Sch.score2015$brks*100
breaks.Sch.score2015$brks <- round(breaks.Sch.score2015$brks, 2)
breaks.Sch.score2015$brks <- paste(breaks.Sch.score2015$brks,"%")
legend( "bottomright", bg="white",
pch=19, pt.cex=1.5, cex=0.7,
legend=capitalize(leglabs(breaks.Sch.score2015$brks)),
col=col.ramp.sc,
box.col="white",
title="Averaged Test Scores", title.adj = .1, xjust=1
)
Distribution for Change in Test Scores by Schools from 2005-2010
school.w.census.grp <- group_by(school.w.census, Accountability.School)
school.diffs <- mutate(school.w.census.grp, Score_Diff = c(0, diff(Score)))
school.diffs.2010 <- filter(school.diffs, Test.Year == 2010 & Score_Diff != 0)
school.diffs.2010$Score_Diff <- school.diffs.2010$Score_Diff*100
par(family="Georgia")
hist(school.diffs.2010$Score_Diff,
main="Distribution for Change in Scores by Schools from 2005-2010 ",
xlab="Percentage Point Change of Averaged Test Scores", ylab="Number of Schools",
col="dodgerblue4",
las=1)
Distribution for Change in Test Scores by Schools from 2010-2015
school.diffs.2015 <- filter(school.diffs, Test.Year == 2015 & Score_Diff != 0)
school.diffs.2015$Score_Diff <- school.diffs.2015$Score_Diff*100
par(family="Georgia")
hist(school.diffs.2015$Score_Diff,
main="Distribution for Change in Scores by Schools from 2010-2015 ",
xlab="Percentage Point Change of Averaged Test Scores", ylab="Number of Schools",
col="dodgerblue4",
las=1)
Average Negative Percentage Point Change in Test Score for All Schools
school.diffs.2010.15 <- filter(school.diffs, Test.Year != 2005 & Score_Diff != 0)
Group.school.diffs.2010.15 <-group_by(school.diffs.2010.15 , Test.Year)
Avg.score.diff.schools <- summarise(Group.school.diffs.2010.15, Avg.Score_Diff=mean(Score_Diff, na.rm=T))
Avg.score.diff.schools$Avg.Score_Diff <-lapply(Avg.score.diff.schools$Avg.Score_Diff, abs)
Avg.score.diff.schools$Avg.Score_Diff <- as.numeric(Avg.score.diff.schools$Avg.Score_Diff)
Avg.score.diff.schools$Avg.Score_Diff <- Avg.score.diff.schools$Avg.Score_Diff*100
par(family="Georgia")
barplot(Avg.score.diff.schools$Avg.Score_Diff,
main="Average Negative Percentage Point Change in Test Scores",
xlab="Year", ylab = "Negative Percent Change ",
names.arg=Avg.score.diff.schools$Test.Year, space=0, ylim=c(0, 25),
col= c("dodgerblue4", "dodgerblue4")
)
text(0.4723672,7.681059, "-6.81 ")
text(1.476868, 23.120831, "-22.22 ")
Prepare Map of Schools by Change in Test Scores from 2010-2015
color.function <- colorRampPalette( c("firebrick4","light gray","steelblue" ) )
col.ramp.school.diff15 <- color.function( 5 ) # number of groups you desire
color.vector.school.diff15 <- cut(school.diffs.2015$Score_Diff, breaks= c(-37.30074, -11.24, -6.95, -4.25, -2.19, 5.052418), labels=col.ramp.school.diff15 )
color.vector.school.diff15 <- as.character( color.vector.school.diff15 )
Prepare Map of Schools by Change in Test Scores from 2005-2010
color.function <- colorRampPalette( c("firebrick4","light gray","steelblue" ) )
col.ramp.school.diff10 <- color.function( 5 ) # number of groups you desire
color.vector.school.diff10 <- cut( rank(school.diffs.2010$Score_Diff), breaks=5, labels=col.ramp.school.diff10 )
color.vector.school.diff10 <- as.character( color.vector.school.diff10 )
breaks.Score.diff2010 <-classIntervals(school.diffs.2010$Score_Diff, n=5, style="quantile")
breaks.Score.diff2010$brks <- round(breaks.Score.diff2010$brks, 2)
GIF for Change in Averaged Test Scores by School from 2005-2010 & 2010-2015
saveGIF({
{
par(family="Georgia")
plot(census.syr, main="Change in Averaged Test Scores by School from 2005 to 2010")
points(school.diffs.2010$lon, school.diffs.2010$lat, col=color.vector.school.diff10, pch=20, cex=3)
legend( "bottomright", bg="white",
pch=19, pt.cex=2, cex=1.3,
legend=capitalize(leglabs(breaks.Score.diff2010$brks)),
col=col.ramp.school.diff10,
box.col="white",
title="Change in Percentage Points 2010 ", title.adj = .1, xjust=1
)
par(family="Georgia")
plot(census.syr, main="Change in Averaged Test Scores by School from 2010 to 2015")
points(school.diffs.2015$lon, school.diffs.2015$lat, col=color.vector.school.diff15, pch=20, cex=3)
legend( "bottomright", bg="white",
pch=19, pt.cex=2, cex=1.3,
legend=capitalize(leglabs(breaks.Score.diff2010$brks)),
col=col.ramp.school.diff15,
box.col="white" ,
title="Change in Percentage Points 2015", title.adj = .1, xjust=1
)
}
},
movie.name = "movie_name5.gif", # name of your gif
interval = 1.5, # controls the animation speed
ani.width = 800, # size of the gif in pixels
ani.height = 800 ) # size of the git in pixels
## [1] TRUE