library(tidyr) library(dplyr) a <- read.table("http://www.stat.ucla.edu/~nchristo/statistics_c173_c273/county_adjacency.txt", sep="\t", fill=FALSE, strip.white=TRUE, stringsAsFactors=FALSE) b <- a %>% fill(V2) c <- a %>% filter(V2 != "") %>% .[,1:2] names(c)[1] <- "COUNTY" d <- left_join(b,c, by = "V2") %>% filter(COUNTY != V3) %>% select(COUNTY, V2, V3, V4) e <- d[grep(", CA",d$COUNTY),] e2 <- e[(e$V4>5999 & e$V4<7000),] f <- as.data.frame.matrix(table(e2$COUNTY, e2$V3)) for(i in 1:dim(f)[1]){ if(row.names(f)[i]==names(f)[i]){ f[i,i]<-1 } } g <- as.matrix(f) dim(g)