Visualising and Analysing Geographic and Movement Data
The task of this Take-home exercise is to reveal the following by appropriate visualization techniques
Social areas of the city of Engagement, Ohio USA.
Visualising and analysing locations with traffic bottleneck of the city of Engagement, Ohio USA.
Before we get started, it is important for us to ensure that the required R packages have been installed. If yes, we will load the R pacakges. If they have yet to be installed, we will install the R packages and load them onto R environment.
The code chunk below is used to install and load the required packages onto RStudio.
packages = c('tidyverse','sf','tmap','data.table','knitr','sp')
for(p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
The code chunk below imports all the required files from the data
folder into R by using read_csv()
of readr and save it as an
tibble data frame called data
# read csv file
travel <- read_csv("data/TravelJournal.csv")
schools <- read_sf("data/wkt//Schools.csv",
options = "GEOM_POSSIBLE_NAMES=location")
apartments <- read_sf("data/wkt/Apartments.csv",
options = "GEOM_POSSIBLE_NAMES=location")
buildings <- read_sf("data/wkt/Buildings.csv",
options = "GEOM_POSSIBLE_NAMES=location")
pubs <- read_sf("data/wkt/Pubs.csv",
options = "GEOM_POSSIBLE_NAMES=location")
restaurants <- read_sf("data/wkt/Restaurants.csv",
options = "GEOM_POSSIBLE_NAMES=location")
employers <- read_sf("data/wkt/Employers.csv",
options = "GEOM_POSSIBLE_NAMES=location")
apartments <- apartments %>%
mutate(across(rentalCost, as.integer),
across(maxOccupancy, as.integer),
across(numberOfRooms, as.integer))
apartments$rentalCost <- cut(apartments$rentalCost , breaks = c(347,531,713,876,1040,1250,1601),labels = c('348-531','532-713','714-876','877-1040','1041-1250','1251-1601'),include.lowest=TRUE)
Let’s visualise the restaurants and pubs with different no. of occupancies
buildings <- buildings %>%
mutate(across(maxOccupancy, as.integer))
buildings$maxOccupancy <- cut(buildings$maxOccupancy , breaks = c(1,25,50,75,100,420),labels = c('2-25','26-50','51-75','76-100','101-420'),include.lowest=TRUE)
restaurants <- restaurants %>%
mutate(across(maxOccupancy, as.integer))
pubs <- pubs %>%
mutate(across(maxOccupancy, as.integer))
tmap_mode("view")
tm_shape(buildings)+
tm_polygons(col = "maxOccupancy",
size = 1,
border.col = "black",
border.lwd = 1,
legend.show = FALSE)+
tm_shape(pubs)+
tm_markers(col = "yellow",
shape= marker_icon(),
size = "maxOccupancy",
border.col = "black",
border.lwd = 1)+
tm_shape(restaurants)+
tm_bubbles(col = "blue",
shape= marker_icon(),
size = "maxOccupancy",
border.col = "black",
border.lwd = 2)
When we zoom the graph, It shows that restaurants 447,898, 896 have most no. of occupancy and pubs 892, 444, 893 have higher occupancy compared to others.
Let us look at the apartments in the city with respect to no. of rooms
apartments$numberOfRooms <- as.numeric(apartments$numberOfRooms)
tm_shape(apartments)+
tm_bubbles(col = "yellow",
size = "numberOfRooms",
border.lwd = 0.5) +
tm_facets(by= "numberOfRooms",
sync = TRUE)
The above graph shows that the apartments in the city with different no. of rooms are almos the same.
Now, let’s see what is the duration which the participant spends in each pub or restaurant of the city.
travel_tidy <- travel %>%
mutate(travel_duration = difftime(travelEndTime,travelStartTime, units="mins"),
time_spent = difftime(checkOutTime, checkInTime,units = "mins"),
amount_spent = startingBalance - endingBalance ) %>%
dplyr::select(participantId,travelStartLocationId,travelEndLocationId,travel_duration,time_spent,amount_spent,purpose)
kable(head(travel_tidy))
restaurant_filt <- travel_tidy %>%
filter(purpose== "Coming Back From Restaurant")
restaurant_merge <- merge(restaurant_filt, restaurants, by.x="travelStartLocationId",
by.y="restaurantId",
all.x=TRUE)
rest_grp <- restaurant_merge %>%
group_by(travelStartLocationId) %>%
dplyr::summarise(tot_time = sum(time_spent))
rest_grp_merge <- merge(rest_grp,restaurants,by.x="travelStartLocationId",
by.y="restaurantId",
all.x=TRUE)
# rest_grp_merge
# fwrite(rest_grp_merge,
# file= "data/rest_grp_merge.csv",
# sep = ",")
pubs_filt <- travel_tidy %>%
filter(purpose== "Recreation (Social Gathering)")
pub_merge <- merge(pubs_filt, pubs, by.x="travelEndLocationId",
by.y="pubId",
all.x=TRUE)
pub_grp <- pub_merge %>%
group_by(travelEndLocationId) %>%
dplyr::summarise(tot_time = sum(time_spent))
pub_grp_merge <- merge(pub_grp,pubs,by.x="travelEndLocationId",
by.y="pubId",
all.x=TRUE)
#pub_grp_merge
# fwrite(pub_grp_merge,
# file= "data/pub_grp_merge.csv",
# sep = ",")
tmap_mode("view")
tm_shape(rest_data_sf)+
tm_markers(shape=marker_icon(),
col = "blue",
size = "tot_time",
border.col = "black",
border.lwd = 0.5)+
tm_shape(pub_data_sf)+
tm_markers(shape=marker_icon(),
col = "yellow",
size = "tot_time",
border.col = "black",
border.lwd = 1)
It is observed from the graph that participants spend more amount of time in these pubs (1342, 1799) and in these restaurants (896, 445)
Let’s look at the movement pattern of the participants in the city.
logs_selected <- read_rds("data/logs_selected.rds")
buildings <- read_sf("data/wkt/Buildings.csv",
options = "GEOM_POSSIBLE_NAMES=location")
Lets filter out the peak hours and try to visualize what are the areas where traffic is more
tmap_mode("plot")
tm_shape(buildings)+
tm_polygons(col = "grey60",
size = 1,
border.col = "black",
border.lwd = 1) +
tm_shape(logs_path_peak) +
tm_lines(col = "participantId") +
tm_layout(main.title = "Traffic in peak hours",
main.title.position = "center",
main.title.size = 1,
legend.show = FALSE)