::p_load(tmap, sf, DT, stplanr,
pacman
performance, ggpubr, tidyverse)
Hands_on_Ex3
Overview
In this exercise we will explore the movements
Loading packages & data
Load packages
Load data
<- read_csv("data/aspatial/origin_destination_bus_202310.csv") odbus
Rows: 5694297 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): YEAR_MONTH, DAY_TYPE, PT_TYPE, ORIGIN_PT_CODE, DESTINATION_PT_CODE
dbl (2): TIME_PER_HOUR, TOTAL_TRIPS
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
$ORIGIN_PT_CODE <- as.factor(odbus$ORIGIN_PT_CODE)
odbus$DESTINATION_PT_CODE <- as.factor(odbus$DESTINATION_PT_CODE) odbus
Next we will specifically utilise the data for weekdays and only from 6am to 9am. We will also be retaining the destination bus stop this time so that we can see the flow of riders.
<- odbus %>%
odbus6_9 filter(DAY_TYPE == "WEEKDAY") %>%
filter(TIME_PER_HOUR >= 6 &
<= 9) %>%
TIME_PER_HOUR group_by(ORIGIN_PT_CODE,
%>%
DESTINATION_PT_CODE) summarise(TRIPS = sum(TOTAL_TRIPS))
`summarise()` has grouped output by 'ORIGIN_PT_CODE'. You can override using
the `.groups` argument.
We can also showcase the data as a datatable.
datatable(odbus6_9)
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
rds format
We can save the output file as rds format using the write_rds() function.
write_rds(odbus6_9, "data/rds/odbus6_9.rds")
When we need the data, we can use the read_rds() function.
<- read_rds("data/rds/odbus6_9.rds") odbus6_9
Load Geospatial data
We will utilise the busstop location data from Q4 of 2022 and the layout of Singapore using the URA Master Plan from 2019.
<- st_read(dsn = "data/geospatial",
busstop layer = "BusStop") %>%
st_transform(crs = 3414)
Reading layer `BusStop' from data source
`C:\Users\Lian Khye\Desktop\MITB\Geospatial\geartooth\ISSS624\Hands-on_Ex03\data\geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21
<- st_read(dsn = "data/geospatial",
mpsz layer = "MPSZ-2019") %>%
st_transform(crs = 3414)
Reading layer `MPSZ-2019' from data source
`C:\Users\Lian Khye\Desktop\MITB\Geospatial\geartooth\ISSS624\Hands-on_Ex03\data\geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS: WGS 84
<- write_rds(mpsz, "data/rds/mpsz.rds") mpsz
Geospatial data processing
Combine busstop and subzone layout
Firstly we will need to combine the bus stop data and the subzone data together. We will use the st_intersection() function that will allocate the bus stops to the subzones of the masterplan. We will then use select() function to obtain only the bus stop numbers and the subzone regions to keep the data more compact.
<- st_intersection(busstop, mpsz) %>%
busstop_mpsz select(BUS_STOP_N, SUBZONE_C) %>%
st_drop_geometry()
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
We can also visualist this using datatable() function.
datatable(busstop_mpsz)
Once again we will save this into rds format using the write_rds() function.
write_rds(busstop_mpsz, "data/rds/busstop_mpsz.rds")
Combine busstop-subzone data and ridership info
Next we will then combine the busstop and subzone information together with the ridership information for weekdays, 6am to 9am. This will allow us to allocate the ridership to each subzone. We will use the bus stop number to join the data using left_join() function.
<- left_join(odbus6_9 , busstop_mpsz,
od_data by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
rename(ORIGIN_BS = ORIGIN_PT_CODE,
ORIGIN_SZ = SUBZONE_C,
DESTIN_BS = DESTINATION_PT_CODE)
Warning in left_join(odbus6_9, busstop_mpsz, by = c(ORIGIN_PT_CODE = "BUS_STOP_N")): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 25632 of `x` matches multiple rows in `y`.
ℹ Row 673 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
To prevent over adding of data, we will check for duplicates.
<- od_data %>%
duplicate group_by_all() %>%
filter(n()>1) %>%
ungroup()
duplicate
# A tibble: 1,186 × 4
ORIGIN_BS DESTIN_BS TRIPS ORIGIN_SZ
<chr> <fct> <dbl> <chr>
1 11009 01341 1 QTSZ01
2 11009 01341 1 QTSZ01
3 11009 01411 4 QTSZ01
4 11009 01411 4 QTSZ01
5 11009 01421 17 QTSZ01
6 11009 01421 17 QTSZ01
7 11009 01511 19 QTSZ01
8 11009 01511 19 QTSZ01
9 11009 01521 2 QTSZ01
10 11009 01521 2 QTSZ01
# ℹ 1,176 more rows
If there is any output from the duplicate function that we just perform, we can use the unique() function to keep only unique data points.
<- unique(od_data) od_data
We can then finally add the destination subzone information back using left_join() function.
<- left_join(od_data , busstop_mpsz,
od_data by = c("DESTIN_BS" = "BUS_STOP_N"))
Warning in left_join(od_data, busstop_mpsz, by = c(DESTIN_BS = "BUS_STOP_N")): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 167 of `x` matches multiple rows in `y`.
ℹ Row 672 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
We can also check for duplication once more using the same steps that we did earlier.
<- od_data %>%
duplicate group_by_all() %>%
filter(n()>1) %>%
ungroup()
duplicate
# A tibble: 1,350 × 5
ORIGIN_BS DESTIN_BS TRIPS ORIGIN_SZ SUBZONE_C
<chr> <chr> <dbl> <chr> <chr>
1 01013 51071 2 RCSZ10 CCSZ01
2 01013 51071 2 RCSZ10 CCSZ01
3 01112 51071 66 RCSZ10 CCSZ01
4 01112 51071 66 RCSZ10 CCSZ01
5 01112 53041 4 RCSZ10 BSSZ01
6 01112 53041 4 RCSZ10 BSSZ01
7 01121 51071 8 RCSZ04 CCSZ01
8 01121 51071 8 RCSZ04 CCSZ01
9 01121 82221 1 RCSZ04 GLSZ05
10 01121 82221 1 RCSZ04 GLSZ05
# ℹ 1,340 more rows
<- unique(od_data) od_data
Sum total trips
To count the total number of trips made from a bus stop of origin to the destination, we can use group_by() for sorting. Putting multiple arguments into the function allows for the sub categorising the data. This way we can track the drop off point of the passengers.
<- od_data %>%
od_data rename(DESTIN_SZ = SUBZONE_C) %>%
drop_na() %>%
group_by(ORIGIN_SZ, DESTIN_SZ) %>%
summarise(MORNING_PEAK = sum(TRIPS))
`summarise()` has grouped output by 'ORIGIN_SZ'. You can override using the
`.groups` argument.
We will then save the data into rds format again
write_rds(od_data, "data/rds/od_data.rds")
<- read_rds("data/rds/od_data.rds") od_data
Visualisation
Here we will visualise the flow of passengers across subzones.
Removing intra-subzone travel
We will remove the movement of passengers within subzones using the following code.
<- od_data[od_data$ORIGIN_SZ!=od_data$DESTIN_SZ,] od_data1
Create desire lines
Next we will visualise all of the flow or connections between different bus stops from a subzone to another using the od2line() function from the stplanr package.
<- od2line(flow = od_data1,
flowLine zones = mpsz,
zone_code = "SUBZONE_C")
Creating centroids representing desire line start and end points.
Visualise desire lines
We can then visualise this flow using the following code. Since the passenger flow is high, it is better to limit the visualisation. Here we will use a flow passenger flow from subzone to subzone of 5000 or more.
tm_shape(mpsz) +
tm_polygons() +
%>%
flowLine tm_shape() +
tm_lines(lwd = "MORNING_PEAK",
style = "quantile",
scale = c(0.1, 1, 3, 5, 7, 10),
n = 6,
alpha = 0.3)
Warning in g$scale * (w_legend/maxW): longer object length is not a multiple of
shorter object length
Warning in g$scale * (x/maxW): longer object length is not a multiple of
shorter object length
tm_shape(mpsz) +
tm_polygons() +
%>%
flowLine filter(MORNING_PEAK >= 5000) %>%
tm_shape() +
tm_lines(lwd = "MORNING_PEAK",
style = "quantile",
scale = c(0.1, 1, 3, 5, 7, 10),
n = 6,
alpha = 0.3)
Warning in g$scale * (w_legend/maxW): longer object length is not a multiple of
shorter object length
Warning in g$scale * (x/maxW): longer object length is not a multiple of
shorter object length