R Packages

Original Census Query (deprecated):

See censusapi package for an easier method. For replication purposes the original code used to fetch census data is included here:

i <- "TX"


# median age  - "B01002A_001"
  
my.acs <- acs.fetch( endyear=2010, geo=geo.make( state=i, county="*", tract="*"), variable="B01002A_001" )
  
state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
  
county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)

tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)

tract.id <- paste( state, county, tract, sep="" )

dat.age <- cbind( tract.id, age=my.acs@estimate[,"B01002A_001"] )





# hispanic - "B03003_003" 

my.acs <- acs.fetch( endyear=2010, 
                     geo=geo.make( state=i, county="*", tract="*"), 
                     variable="B03003_003" )

state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)

county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)

tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)

tract.id <- paste( state, county, tract, sep="" )

dat.hisp <- cbind( tract.id, hispanic=my.acs@estimate[,"B03003_003"] )





# poverty -  "B17001_001"  # baseline pop for which poverty is measured
# poverty -  "B17001_002"

my.acs <- acs.fetch( endyear=2010, 
                     geo=geo.make( state=i, county="*", tract="*"), 
                     variable="B17001_001" )

state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)

county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)

tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)

tract.id <- paste( state, county, tract, sep="" )

dat.pov1 <- cbind( tract.id, any.income=my.acs@estimate[,"B17001_001"] )

my.acs <- acs.fetch( endyear=2010, 
                     geo=geo.make( state=i, county="*", tract="*"), 
                     variable="B17001_002" )

state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)

county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)

tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)

tract.id <- paste( state, county, tract, sep="" )

dat.pov2 <- cbind( tract.id, inc.below=my.acs@estimate[,"B17001_002"] )

dat.pov <- merge( dat.pov1, dat.pov2 )

dat.pov$inc.below <- as.numeric( as.character( dat.pov$inc.below ) )
dat.pov$any.income <- as.numeric( as.character( dat.pov$any.income ) )
dat.pov$pov.rate <- dat.pov$inc.below / dat.pov$any.income
dat.pov$pov.rate[ dat.pov$any.income == 0 ] <- 0






# median household income - "B19013_001"

my.acs <- acs.fetch( endyear=2010, 
                     geo=geo.make( state=i,county="*", tract="*"), 
                     variable="B19013_001")

state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)

county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)

tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)

tract.id <- paste( state, county, tract, sep="" )

dat.inc <- cbind( tract.id, income=my.acs@estimate[,"B19013_001"] )





# race.white - "B02001_002"

my.acs <- acs.fetch( endyear=2010, 
                     geo=geo.make( state=i, county="*", tract="*"), 
                     variable="B02001_002" )

state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)

county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)

tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)

tract.id <- paste( state, county, tract, sep="" )

dat.race.white <- cbind( tract.id, white=my.acs@estimate[,"B02001_002"] )




# race.black - "B02001_003"

my.acs <- acs.fetch( endyear=2010, 
                     geo=geo.make( state=i, county="*", tract="*"), 
                     variable="B02001_003" )

state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)

county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)

tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)

tract.id <- paste( state, county, tract, sep="" )

dat.race.black <- cbind( tract.id, black=my.acs@estimate[,"B02001_003"] )





# total population - "B01003_001"

my.acs <- acs.fetch( endyear=2010, 
                     geo=geo.make( state=i, county="*", tract="*"), 
                     variable="B01003_001" )

state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)

county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)

tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)

tract.id <- paste( state, county, tract, sep="" )

dat.pop <- cbind( tract.id, totalpop=my.acs@estimate[,"B01003_001"] )

dat.pop <- as.data.frame( dat.pop )



  
dat.pop$tract.name <- rownames(dat.pop)

dat <- merge( dat.pop, dat.pov )
dat <- merge( dat, dat.race.white )
dat <- merge( dat, dat.race.black )
dat <- merge( dat, dat.inc )
dat <- merge( dat, dat.age )
dat <- merge( dat, dat.hisp )



dat$totalpop <- as.numeric( as.character( dat$totalpop ))
dat$white <- as.numeric( as.character( dat$white ))
dat$black <- as.numeric( as.character( dat$black ))
dat$hispanic <- as.numeric( as.character( dat$hispanic ))
dat$income <- as.numeric( as.character( dat$income ))
dat$age <- as.numeric( as.character( dat$age ))

dat$pov.rate <- round( 100*dat$pov.rate, 1 )
dat$white <- round( 100*(dat$white / dat$totalpop), 1 )
dat$black <- round( 100*(dat$black / dat$totalpop), 1 )
dat$hispanic <- round( 100*(dat$hispanic / dat$totalpop), 1 )

census2 <- dat[ c("tract.id","tract.name","totalpop","income",
                 "age","pov.rate","white","black","hispanic") ]

names( census2 ) <- c("geoid","geoname","totalpop","income",
                     "medianage","poverty","white","black","hispanic")

head( census2 )
Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
medianage 5,215 36 8 11 30 42 80
income 5,209 52,713 28,259 6,140 33,831 63,284 250,001
totalpop 5,224 4,654 2,241 24 3,100 5,826 25,073
poverty 5,224 18 13 0 7 25 100
hispanic 5,224 36 28 0 13 54 100
white 5,224 72 20 0 63 87 100
black 5,224 12 17 0 1 15 100

If you want to build a dataset from multiple states at once, you will need to use a loop because the Census API limits downloads at the tract level to one state at a time. It would look something like this:

### CREATE A LOOP TO GRAB DATA FOR ALL STATES AND STACK

dat.pov <- NULL # poverty rate

dat.inc <- NULL # median family income

dat.race.white <- NULL  # % white 

dat.race.black <- NULL  # % black

dat.pop <- NULL # total population


state.list <- c("AL","AK","AR","AZ","CA","CO","CT","DE","FL","GA","HI","ID","IL",
                "IN","IA","KS","KY","LA","MD","MA","ME","MI","MN","MS","MO","MT",
                "NE","NV","NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI",
                "SC","SD","TN","TX","UT","VT","VA","WA","WI","WV","WY")  


# hispanic:  "B01001I_001"
# median age:  "B01002A_001"

acs.fetch( endyear=2010, geo=geo.make( state="TX", county="*" ), variable="B17001_001" )
acs.fetch( endyear=2010, geo=geo.make( state="TX", county="*" ), variable="B01003_001" )


for( i in state.list )
{
  
  # i <- "AL"
  
  # poverty - please double-check to make sure "B17001_001" is correct
  
  my.acs <- acs.fetch( endyear=2010, 
                       geo=geo.make( state=i, county="*", tract="*"), 
                       variable="B17001_001" )
  
  state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
  
  county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
  
  tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
  
  tract.id <- paste( state, county, tract, sep="" )
  
  temp.pov <- cbind( tract.id, poverty=my.acs@estimate[,"B17001_001"] )
  
  dat.pov <- rbind( dat.pov, temp.pov )
  
  
  # median household income
  
  my.acs <- acs.fetch( endyear=2010, 
                       geo=geo.make( state=i,county="*", tract="*"), 
                       variable="B19013_001")
  
  state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
  
  county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
  
  tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
  
  tract.id <- paste( state, county, tract, sep="" )
  
  temp.inc <- cbind( tract.id, income=my.acs@estimate[,"B19013_001"] )
  
  dat.inc <- rbind( dat.inc, temp.inc )
  
  
  # race.white
  
  my.acs <- acs.fetch( endyear=2010, 
                       geo=geo.make( state=i, county="*", tract="*"), 
                       variable="B02001_002" )
  
  state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
  
  county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
  
  tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
  
  tract.id <- paste( state, county, tract, sep="" )
  
  temp.race.white <- cbind( tract.id, white=my.acs@estimate[,"B02001_002"] )
  
  dat.race.white <- rbind( dat.race.white, temp.race.white )
  
  # race.black
  
  my.acs <- acs.fetch( endyear=2010, 
                       geo=geo.make( state=i, county="*", tract="*"), 
                       variable="B02001_003" )
  
  state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
  
  county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
  
  tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
  
  tract.id <- paste( state, county, tract, sep="" )
  
  temp.race.black <- cbind( tract.id, black=my.acs@estimate[,"B02001_003"] )
  
  dat.race.black <- rbind( dat.race.black, temp.race.black )
  
  # total population 
  
  my.acs <- acs.fetch( endyear=2010, 
                       geo=geo.make( state=i, county="*", tract="*"), 
                       variable="B01003_001" )
  
  state <- substr( as.character( my.acs@geography$state + 100 ), 2, 3)
  
  county <- substr( as.character( my.acs@geography$county + 1000 ), 2, 4)
  
  tract <- substr( as.character( as.numeric(my.acs@geography$tract) + 1000000 ), 2, 7)
  
  tract.id <- paste( state, county, tract, sep="" )
  
  temp.pop <- cbind( tract.id, totalpop=my.acs@estimate[,"B01003_001"] )
  
  dat.pop <- rbind( dat.pop, temp.pop ) 
  
}



dat <- merge( dat.pov, dat.inc )

dat <- merge( dat, dat.race.white )

dat <- merge( dat, dat.race.black )

dat <- merge( dat, dat.pop )


names( dat )