See censusapi
package for an easier method. For replication purposes the original code used to fetch census data is included here:
i <- "TX"
# median age - "B01002A_001"
my.acs <- acs.fetch( endyear=2010, geo=geo.make( state=i, county="*", tract="*"), variable="B01002A_001" )
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
tract.id <- paste( state, county, tract, sep="" )
dat.age <- cbind( tract.id, age=my.acs@estimate[,"B01002A_001"] )
# hispanic - "B03003_003"
my.acs <- acs.fetch( endyear=2010,
geo=geo.make( state=i, county="*", tract="*"),
variable="B03003_003" )
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
tract.id <- paste( state, county, tract, sep="" )
dat.hisp <- cbind( tract.id, hispanic=my.acs@estimate[,"B03003_003"] )
# poverty - "B17001_001" # baseline pop for which poverty is measured
# poverty - "B17001_002"
my.acs <- acs.fetch( endyear=2010,
geo=geo.make( state=i, county="*", tract="*"),
variable="B17001_001" )
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
tract.id <- paste( state, county, tract, sep="" )
dat.pov1 <- cbind( tract.id, any.income=my.acs@estimate[,"B17001_001"] )
my.acs <- acs.fetch( endyear=2010,
geo=geo.make( state=i, county="*", tract="*"),
variable="B17001_002" )
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
tract.id <- paste( state, county, tract, sep="" )
dat.pov2 <- cbind( tract.id, inc.below=my.acs@estimate[,"B17001_002"] )
dat.pov <- merge( dat.pov1, dat.pov2 )
dat.pov$inc.below <- as.numeric( as.character( dat.pov$inc.below ) )
dat.pov$any.income <- as.numeric( as.character( dat.pov$any.income ) )
dat.pov$pov.rate <- dat.pov$inc.below / dat.pov$any.income
dat.pov$pov.rate[ dat.pov$any.income == 0 ] <- 0
# median household income - "B19013_001"
my.acs <- acs.fetch( endyear=2010,
geo=geo.make( state=i,county="*", tract="*"),
variable="B19013_001")
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
tract.id <- paste( state, county, tract, sep="" )
dat.inc <- cbind( tract.id, income=my.acs@estimate[,"B19013_001"] )
# race.white - "B02001_002"
my.acs <- acs.fetch( endyear=2010,
geo=geo.make( state=i, county="*", tract="*"),
variable="B02001_002" )
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
tract.id <- paste( state, county, tract, sep="" )
dat.race.white <- cbind( tract.id, white=my.acs@estimate[,"B02001_002"] )
# race.black - "B02001_003"
my.acs <- acs.fetch( endyear=2010,
geo=geo.make( state=i, county="*", tract="*"),
variable="B02001_003" )
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
tract.id <- paste( state, county, tract, sep="" )
dat.race.black <- cbind( tract.id, black=my.acs@estimate[,"B02001_003"] )
# total population - "B01003_001"
my.acs <- acs.fetch( endyear=2010,
geo=geo.make( state=i, county="*", tract="*"),
variable="B01003_001" )
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
tract.id <- paste( state, county, tract, sep="" )
dat.pop <- cbind( tract.id, totalpop=my.acs@estimate[,"B01003_001"] )
dat.pop <- as.data.frame( dat.pop )
dat.pop$tract.name <- rownames(dat.pop)
dat <- merge( dat.pop, dat.pov )
dat <- merge( dat, dat.race.white )
dat <- merge( dat, dat.race.black )
dat <- merge( dat, dat.inc )
dat <- merge( dat, dat.age )
dat <- merge( dat, dat.hisp )
dat$totalpop <- as.numeric( as.character( dat$totalpop ))
dat$white <- as.numeric( as.character( dat$white ))
dat$black <- as.numeric( as.character( dat$black ))
dat$hispanic <- as.numeric( as.character( dat$hispanic ))
dat$income <- as.numeric( as.character( dat$income ))
dat$age <- as.numeric( as.character( dat$age ))
dat$pov.rate <- round( 100*dat$pov.rate, 1 )
dat$white <- round( 100*(dat$white / dat$totalpop), 1 )
dat$black <- round( 100*(dat$black / dat$totalpop), 1 )
dat$hispanic <- round( 100*(dat$hispanic / dat$totalpop), 1 )
census2 <- dat[ c("tract.id","tract.name","totalpop","income",
"age","pov.rate","white","black","hispanic") ]
names( census2 ) <- c("geoid","geoname","totalpop","income",
"medianage","poverty","white","black","hispanic")
head( census2 )
census2$income[ census2$income == -666666666 ] <- NA
census2$medianage[ census2$medianage == -666666666 ] <- NA
census2$poverty[ census2$totalpop == 0 ] <- NA
census2$white[ census2$totalpop == 0 ] <- NA
census2$black[ census2$totalpop == 0 ] <- NA
census2$totalpop[ census2$totalpop == 0 ] <- NA
census2 %>%
select( medianage, income, totalpop, poverty,
hispanic, white, black ) %>%
stargazer( type = "html", digits=0 )
Statistic | N | Mean | St. Dev. | Min | Pctl(25) | Pctl(75) | Max |
medianage | 5,215 | 36 | 8 | 11 | 30 | 42 | 80 |
income | 5,209 | 52,713 | 28,259 | 6,140 | 33,831 | 63,284 | 250,001 |
totalpop | 5,224 | 4,654 | 2,241 | 24 | 3,100 | 5,826 | 25,073 |
poverty | 5,224 | 18 | 13 | 0 | 7 | 25 | 100 |
hispanic | 5,224 | 36 | 28 | 0 | 13 | 54 | 100 |
white | 5,224 | 72 | 20 | 0 | 63 | 87 | 100 |
black | 5,224 | 12 | 17 | 0 | 1 | 15 | 100 |
If you want to build a dataset from multiple states at once, you will need to use a loop because the Census API limits downloads at the tract level to one state at a time. It would look something like this:
### CREATE A LOOP TO GRAB DATA FOR ALL STATES AND STACK
dat.pov <- NULL # poverty rate
dat.inc <- NULL # median family income
dat.race.white <- NULL # % white
dat.race.black <- NULL # % black
dat.pop <- NULL # total population
state.list <- c("AL","AK","AR","AZ","CA","CO","CT","DE","FL","GA","HI","ID","IL",
"IN","IA","KS","KY","LA","MD","MA","ME","MI","MN","MS","MO","MT",
"NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI",
"SC","SD","TN","TX","UT","VT","VA","WA","WI","WV","WY")
# hispanic: "B01001I_001"
# median age: "B01002A_001"
acs.fetch( endyear=2010, geo=geo.make( state="TX", county="*" ), variable="B17001_001" )
acs.fetch( endyear=2010, geo=geo.make( state="TX", county="*" ), variable="B01003_001" )
for( i in state.list )
{
# i <- "AL"
# poverty - please double-check to make sure "B17001_001" is correct
my.acs <- acs.fetch( endyear=2010,
geo=geo.make( state=i, county="*", tract="*"),
variable="B17001_001" )
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
tract.id <- paste( state, county, tract, sep="" )
temp.pov <- cbind( tract.id, poverty=my.acs@estimate[,"B17001_001"] )
dat.pov <- rbind( dat.pov, temp.pov )
# median household income
my.acs <- acs.fetch( endyear=2010,
geo=geo.make( state=i,county="*", tract="*"),
variable="B19013_001")
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
tract.id <- paste( state, county, tract, sep="" )
temp.inc <- cbind( tract.id, income=my.acs@estimate[,"B19013_001"] )
dat.inc <- rbind( dat.inc, temp.inc )
# race.white
my.acs <- acs.fetch( endyear=2010,
geo=geo.make( state=i, county="*", tract="*"),
variable="B02001_002" )
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
tract.id <- paste( state, county, tract, sep="" )
temp.race.white <- cbind( tract.id, white=my.acs@estimate[,"B02001_002"] )
dat.race.white <- rbind( dat.race.white, temp.race.white )
# race.black
my.acs <- acs.fetch( endyear=2010,
geo=geo.make( state=i, county="*", tract="*"),
variable="B02001_003" )
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
tract.id <- paste( state, county, tract, sep="" )
temp.race.black <- cbind( tract.id, black=my.acs@estimate[,"B02001_003"] )
dat.race.black <- rbind( dat.race.black, temp.race.black )
# total population
my.acs <- acs.fetch( endyear=2010,
geo=geo.make( state=i, county="*", tract="*"),
variable="B01003_001" )
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
tract.id <- paste( state, county, tract, sep="" )
temp.pop <- cbind( tract.id, totalpop=my.acs@estimate[,"B01003_001"] )
dat.pop <- rbind( dat.pop, temp.pop )
}
dat <- merge( dat.pov, dat.inc )
dat <- merge( dat, dat.race.white )
dat <- merge( dat, dat.race.black )
dat <- merge( dat, dat.pop )
names( dat )