# Test median calibration against two other fast calibation methods # Louis Merlin # Last Updated: December 10 2019 # Goal of last update # Remove unnecessary code to streamline for sharing in publication library(data.table) # Make data frame to store results # Create data frame to store results for 50 different cities results <- as.data.table(expand.grid(city_ind = 1:50,loop_ind = 1:100, imp_true_pl = 0, imp_est_med_pl = 0, imp_est_osth_pl = 0, imp_est_TLD_pl = 0, imp_err_med_pl = 0, imp_err_osth_pl = 0, imp_err_TLD_pl = 0, median_pl = 0)) # Create data frame to store accuracy of sample median calibration results_med1 <- as.data.table(expand.grid(city_ind = 1:50, loop_ind = 1:30 , med_50 = 0, med_100 = 0, med_150 = 0, med_200 = 0, med_250 = 0, med_300 = 0)) results_med2 <- as.data.table(expand.grid(city_ind = 1:50, loop_ind = 1:30 , med_50 = 0, med_100 = 0, med_150 = 0, med_200 = 0, med_250 = 0, med_300 = 0)) # imp_est_med = Impedance estimate from median calibration method # imp_est_osth = Impedance estimate from osth method # imp_est_TLD = Impedance estimate from trip length distribution method # imp_err_med = Impedance error from median calibration method # imp_err_osth = Impedance error from osth method # imp_err_TLD = Impedance error from trip length distribution method setkey(results,city_ind,loop_ind) # results # results[city_ind==12,] # Store how many times estimated median is exactly correct # results_med1 # Store how many times estimated median is 1 minute off # results_med2 # Step 0: Functions # Step 1: Generate a random city # Step 2: Generate an impedance value # Step 3: Generate OD flows from gravity model # Step 4: Generate alternative predictions for impedance coefficient # Step 5: Compare predicted impedances to actual impedance # Step 6: Report and record output ##### # Step 0: Functions ##### # Adjust gravity model to incorporate As and Bs gravity_ibf <- function(type=c("origin","destin"), skim, taz_list, dest_DT, imp){ n <- nrow(taz_list) output <- data.table(TAZ = taz_list, new_zonal_wgt = 0) setkey(dest_DT,x,y) # If measuring travel times from a particular origin if (type=="origin") { for (i in 1:n) { origin <- taz_list[i] origin_i <- skim[origin_x==origin[,x]&origin_y==origin[,y],] setkey(origin_i,destin_x,destin_y) zone_sums <- dest_DT[origin_i,sum(exp(-mins*imp)*dest*zonal_wgt),nomatch=0] output[i,new_zonal_wgt:=zone_sums] } } # If measuring travel times to a particular destination if (type=="destin") { for (i in 1:n) { destin <- taz_list[i] destin_i <- skim[destin_x==destin[,x]&destin_y==destin[,y],] setkey(destin_i,origin_x,origin_y) zone_sums <- dest_DT[destin_i,sum(exp(-mins*imp)*dest*zonal_wgt),nomatch=0] output[i,new_zonal_wgt:=zone_sums] } } return(output) } ##### # End Step 0: Functions ##### ##### # Step 1: Generate a random city ##### ##### # Set constants for all cities city_total <- 400000 # Total workers and employment for the entire city zones_NS <- 20 # Number of zones the city has from north to south zones_EW <- 20 # Number of zones the city has from east to west zone_dist <- 5 # Minutes of travel time between adjacent zones # zone_pop <- 1000 # Worker population within each zone city <- as.data.table(expand.grid(x = 1:zones_EW,y = 1:zones_NS, pop = NA, emp= NA)) # Create tables of origins and of destinations origins <- expand.grid(origin_x = 1:zones_EW,origin_y = 1:zones_NS) head(origins) nrow(origins) norigins <- nrow(origins) # Number of origins destins <- expand.grid(destin_x = 1:zones_EW,destin_y = 1:zones_NS) ndestins <- nrow(destins) # Number of destinations city_no <- 1 # index for rows of results 1 ##### # BEGIN LOOP OVER CITIES HERE ##### while (city_no<51){ # Generate population per zone city$pop <- rnorm(nrow(city), mean = 1000, sd = 300) city$pop[city$pop<100] <- 100 city$pop <- round((city$pop * city_total/sum(city$pop)), digits=0) # sum(city$pop) # Generate employment per zone city$emp <- rexp(nrow(city),1/1000) city$emp <- round((city$emp* (sum(city$pop)/sum(city$emp))), digits=0) # Normalize to same as total population # sum(city$pop) # sum(city$emp) # head(city) # summary(city$pop) # summary(city$emp) # Create travel time matrix tt_matrix <- as.data.table(expand.grid(origin_x = 1:zones_EW,origin_y = 1:zones_NS, destin_x = 1:zones_EW, destin_y = 1:zones_NS)) tt_matrix$mins <- zone_dist * # Average distance between zones (abs(tt_matrix$origin_x-tt_matrix$destin_x) + # Distance along x ccordinate abs(tt_matrix$origin_y-tt_matrix$destin_y)) # Distance along y coordinate tt_matrix[, mins := mins + sample(c(2,1,0,-1,-2), size=(zones_NS*zones_EW)^2, replace=TRUE)] # Random noise 2 to -2 minutes # head(tt_matrix) # Show travel times from a particular origin # tt_matrix[origin_x==5 & origin_y==8] # If origin and destination are same zone, set travel time = 1/2 of three closest zones for (i in 1:norigins){ # for (i in 1:20){ closest3 <- sort(tt_matrix[origin_x==origins[i,1] & origin_y==origins[i,2], mins])[1:8] #print(closest3) tt_matrix[origin_x==origins[i,1] & destin_x==origins[i,1] & origin_y==origins[i,2] & destin_y==origins[i,2], mins := round(mean(closest3)/2)] } # Show travel times for the diagonal # tt_matrix[origin_x==destin_x & origin_y==destin_y] # summary(tt_matrix[origin_x==destin_x & origin_y==destin_y]) # maximum commute in minutes mins_max <- max(tt_matrix$mins) # 190 mins_max ##### # Reset inverse balancing factors for each new city # Start with City As and Bs = 1 city[,A:=1] city[,B:=1] head(city) print("End Step 1: New City Generated") flush.console() ##### # END Step 1: Generate a random city ##### ##### # Step 2: Generate an impedance value ##### # imp_true <- 0.1 ##### # BEGIN LOOP OVER IMPEDANCES HERE ##### # Reset k index for results # k <- 1 # index for rows of results 1 for (imp_loop in 1:30){ imp_true <- imp_loop * 0.01 # imp_true <- 0.1 # print(imp_true) print("End Step 2: New Impedance Generated") flush.console() ##### # END Step 2: Generate an impedance value ##### ##### # Step 3: Generate OD flows from doubly-constrained negative exponential gravity model ##### ##### # Calculate the doubly constrained zonal coefficients As and Bs # Set initial values # Create a loop for updating zonal factors i <- 1 cnvg_A <- 1 cnvg_B <- 1 error_current <- 9999 print("Inverse Balancing Convergence Step") while(cnvg_A + cnvg_B > 0.01 & i < 150){ # for (i in 1:100){ # print("Turn Number") # print(i) i <- i + 1 if (i/20==floor(i/20)){ print(i) print("Current Total Error") error_last <- error_current error_current <- cnvg_A + cnvg_B print(error_current) # If no improvement in error then end loop if (abs(error_current-error_last)<0.00001) {i<-150} flush.console() } # Update A weights dest_zones <- city[,.(x,y,emp,B)] # Peel off districts and employment to make employ data table setnames(dest_zones,"emp","dest") setnames(dest_zones,"B","zonal_wgt") A_zone <- gravity_ibf(type="origin", skim=tt_matrix, taz_list=city_zones, dest_DT=dest_zones, imp = imp_true) setkey(A_zone, TAZ.x,TAZ.y) setkey(city,x,y) diff_A <- abs(city$A-1/A_zone$new_zonal_wgt)/city$A cnvg_A <- sum(diff_A) city[A_zone,A:=1/new_zonal_wgt] # Update B weights dest_zones <- city[,.(x,y,pop,A)] # Peel off districts and employment to make employ data table setnames(dest_zones,"pop","dest") setnames(dest_zones,"A","zonal_wgt") B_zone <- gravity_ibf(type="destin", skim=tt_matrix, taz_list=city_zones , dest_DT=dest_zones, imp = imp_true) # Balance magnitude of weights if (max(A_zone$new_zonal_wgt)>2*max(B_zone$new_zonal_wgt)){B_zone[,new_zonal_wgt:=new_zonal_wgt*2]} if (max(B_zone$new_zonal_wgt)>2*max(A_zone$new_zonal_wgt)){A_zone[,new_zonal_wgt:=new_zonal_wgt*2]} setkey(B_zone, TAZ.x,TAZ.y) setkey(city,x,y) diff_B <- abs(city$B-1/B_zone$new_zonal_wgt)/city$B cnvg_B <- sum(diff_B) city[B_zone,B:=1/new_zonal_wgt] } # END updating A & B weights ###### # Calculate OD flows # Create OD Flow Matrix od_flows <- tt_matrix head(od_flows) # Join origin/population data to od_flows setkey(od_flows,origin_x,origin_y) setkey(city,x,y) tempcity <- city[,.(x,y,pop,A)] od_flows <- od_flows[tempcity,] # Join destination/employment data to od_flows setkey(od_flows,destin_x,destin_y) setkey(city,x,y) tempcity <- city[,.(x,y,emp,B)] od_flows <- od_flows[tempcity,] # Calculate flows with doubly constrained formula od_flows[,flow := A*B*pop*emp*exp(-mins*imp_true)] # head(od_flows) print("Final Inverse Balancing Coefficient Error") print((cnvg_A + cnvg_B)) print("End Step 3: OD Flows Calculated") flush.console() ##### # END Step 3: Generate OD flows from gravity model ##### ##### # Step 4: Generate alternative predictions for impedance coefficient ##### ##### # Generate median travel time setkey(od_flows,mins) od_flows[, cum_flow := cumsum(flow)] head(od_flows[cum_flow>=city_total/2,]) median <- min(od_flows[cum_flow>=city_total/2,mins]) median ###### # osth median estimate imp_est_osth <- -log(0.5)/median imp_est_osth ###### # TLD estimate # Round all travel times to nearest minute od_flows[,mins:=round(mins,0)] setkey(od_flows,mins) head(od_flows) TLD <- od_flows[,round(sum(flow),2),by=mins] setnames(TLD,"V1","flow_total") TLD TLD[,sum(flow_total)] plot(TLD[1:50,]) summary(TLD$flow_total) summary(TLD$flow_total[TLD$flow_total>0]) TLD_clean <- TLD[mins>=3 & flow_total >= 1,] TLD_clean TLD_model <- lm(log(flow_total) ~ mins, data=TLD_clean) summary(TLD_model) coef(TLD_model) imp_est_TLD <- as.numeric(-coef(TLD_model)[2]) imp_est_TLD ###### # Median estimate # Incremental opportunities reachable from each origin # Start with OD flow data od_flows # Create a table of origins by minutes origin_mins <- array(data=0, dim = c(norigins, mins_max)) # head(origin_mins) # For each origin, fill in the origin_mins table with the number of destinations passed from that origin weighted by the number of travelers at the origin for (i in 1:norigins){ for (j in 1:mins_max){ origin_x_temp <- origins[i,1] origin_y_temp <- origins[i,2] origin_mins[i,j] <- sum(od_flows[mins<=j & mins>j-1 & origin_x==origin_x_temp & origin_y==origin_y_temp, emp*pop]) } } incremental_jobs_passed <- colSums(origin_mins)/sum(city$pop) # set starting point for impedance imp_est_med <- 0 imp_prev <- 0 step <- 0.1 toosmall <- 0.5 i <- 1 # for (i in 1:40){ while (step>0.00001){ if (sum(incremental_jobs_passed[1:median]*exp(-imp_est_med*(1:median))) < sum(incremental_jobs_passed[(median+1):mins_max]*exp(-imp_est_med*((median+1):mins_max)))){ # impedance too small # if switching from shrinking impedance to growing impedance, reduce step size if (toosmall==0){ step <- step/2 } imp_est_med <- imp_est_med + step toosmall <- 1 } else { # impedance too large (in absolute value terms) # if switching from growing impedance to shrinking impedance, reduce step size if (toosmall==1){ step <- step/2 } imp_est_med <- imp_est_med - step toosmall <- 0 } i <- i + 1 } # The two follow totals should be equal print("Check on balanced attractiveness before and after median") # Total average accessibility from before the median print(sum(incremental_jobs_passed[1:median]*exp(-imp_est_med*(1:median)))) # Total average accessibility from after the median print(sum(incremental_jobs_passed[(median+1):mins_max]*exp(-imp_est_med*((median+1):mins_max)))) print("End Step 4: Three Impedance Estimates Calculated") flush.console() ##### # Step 4: Generate alternative predictions for impedance coefficient ##### ##### # Step 5: Compare predicted impedances to actual impedance ##### # Calculate errors imp_err_med <- (imp_true - imp_est_med)/imp_true imp_err_osth <- (imp_true - imp_est_osth)/imp_true imp_err_TLD <- (imp_true - imp_est_TLD)/imp_true print("City Number") print(city_no) print("Impedance Loop Number") print(imp_loop) print("True Impedance Coefficient") print(imp_true) print("Median Estimated Impedance Coefficient") print(imp_est_med) print("Trip Length Distribution Impedance Coefficient") print(imp_est_TLD) print("osth Estimated Impedance Coefficient") print(imp_est_osth) print("Median Travel Time") print(median) # Update results data frame results[city_ind==city_no & loop_ind==imp_loop, imp_true_pl:=imp_true] results[city_ind==city_no & loop_ind==imp_loop, imp_est_med_pl:=imp_est_med] results[city_ind==city_no & loop_ind==imp_loop, imp_err_med_pl:=abs(imp_err_med)] results[city_ind==city_no & loop_ind==imp_loop, imp_est_osth_pl:=imp_est_osth] results[city_ind==city_no & loop_ind==imp_loop, imp_err_osth_pl:=abs(imp_err_osth)] results[city_ind==city_no & loop_ind==imp_loop, imp_est_TLD_pl:=imp_est_TLD] results[city_ind==city_no & loop_ind==imp_loop, imp_err_TLD_pl:=abs(imp_err_TLD)] results[city_ind==city_no & loop_ind==imp_loop, median_pl:=median] #results[city_ind==city_no & loop_ind==imp_loop, acc_cor:=cor(city$access,city$access_est)] print("End Step 5: Results Recorded to Results") cat("\n") # Examine accuracy of sample median total_flow <- od_flows[,sum(flow)] # Pull out only flow volumes and minutes from origin-destination flows flows <- od_flows[,c("flow","mins")] # Create probability for each minute based on proportion of total flow flows[,p:=flow/total_flow] # flows # flows[,sum(p)] # Should sum to 1 # Number of times exactly correct median med_ests1 <- array(data = 0, dim = c(1000,6)) # Number of times one minute off med_ests2 <- array(data = 0, dim = c(1000,6)) # head(med_ests2) # sample(flows[,mins],size=50,prob=flows[,p]) for (k in 1:1000){ med_50 <- median(sample(flows[,mins],size=50,prob=flows[,p])) med_100 <- median(sample(flows[,mins],size=100,prob=flows[,p])) med_150 <- median(sample(flows[,mins],size=150,prob=flows[,p])) med_200 <- median(sample(flows[,mins],size=200,prob=flows[,p])) med_250 <- median(sample(flows[,mins],size=250,prob=flows[,p])) med_300 <- median(sample(flows[,mins],size=300,prob=flows[,p])) #print(med_50) #print(med_100) #print(med_150) #print(med_200) #print(med_250) # Indicate when median is exactly correct if (med_50==median){med_ests1[k,1] <- 1} if (med_100==median){med_ests1[k,2] <- 1} if (med_150==median){med_ests1[k,3] <- 1} if (med_200==median){med_ests1[k,4] <- 1} if (med_250==median){med_ests1[k,5] <- 1} if (med_300==median){med_ests1[k,6] <- 1} # Indicate when the median is within 1 minute of true if (abs(med_50-median)<=1){med_ests2[k,1] <- 1} if (abs(med_100-median)<=1){med_ests2[k,2] <- 1} if (abs(med_150-median)<=1){med_ests2[k,3] <- 1} if (abs(med_200-median)<=1){med_ests2[k,4] <- 1} if (abs(med_250-median)<=1){med_ests2[k,5] <- 1} if (abs(med_300-median)<=1){med_ests2[k,6] <- 1} } head(med_ests1) head(med_ests2) # Record percentage exactly right to results_med1 # imp_loop <- 1 results_med1[city_ind==city_no & loop_ind==imp_loop, med_50:=sum(med_ests1[,1])/1000] results_med1[city_ind==city_no & loop_ind==imp_loop, med_100:=sum(med_ests1[,2])/1000] results_med1[city_ind==city_no & loop_ind==imp_loop, med_150:=sum(med_ests1[,3])/1000] results_med1[city_ind==city_no & loop_ind==imp_loop, med_200:=sum(med_ests1[,4])/1000] results_med1[city_ind==city_no & loop_ind==imp_loop, med_250:=sum(med_ests1[,5])/1000] results_med1[city_ind==city_no & loop_ind==imp_loop, med_300:=sum(med_ests1[,6])/1000] results_med1 # Record percentage of within 1 to results_med2 results_med2[city_ind==city_no & loop_ind==imp_loop, med_50:=sum(med_ests2[,1])/1000] results_med2[city_ind==city_no & loop_ind==imp_loop, med_100:=sum(med_ests2[,2])/1000] results_med2[city_ind==city_no & loop_ind==imp_loop, med_150:=sum(med_ests2[,3])/1000] results_med2[city_ind==city_no & loop_ind==imp_loop, med_200:=sum(med_ests2[,4])/1000] results_med2[city_ind==city_no & loop_ind==imp_loop, med_250:=sum(med_ests2[,5])/1000] results_med2[city_ind==city_no & loop_ind==imp_loop, med_300:=sum(med_ests2[,6])/1000] results_med2 print("End Step 6: Sample Median Estimates Recorded") flush.console() } # End impedance parameter loop ##### # END LOOP OVER IMPEDANCES HERE ##### city_no <- city_no + 1 save.image("C:/Users/lmerlin/Documents/MedianCompare.RData") } # End city generation loop ##### # END LOOP OVER CITIES HERE ##### ##### # END Step 5: Compare predicted impedance to actual impedance ##### ##### # Step 6: Report and export results to a table ##### # Show travel time distribution # for (i in 1:192){ # print(length(which(tt_matrix$mins<=i & tt_matrix$mins>i-1))) # } # Output city data to a file to examine write.csv(city,file="city_example.csv") ##### # END Step 6: Export results to a table ##### library(data.table) # Mean median estimate from 50 simulations results2 <- results[city_ind<=50,mean(imp_est_med_pl),by=imp_true_pl] # Mean Osth estimate from 50 simulations temp1 <- results[city_ind<=50,mean(imp_est_osth_pl),by=imp_true_pl] # Mean TLD estimate from 50 simulations temp2 <- results[city_ind<=50,mean(imp_est_TLD_pl),by=imp_true_pl] # Mean median error of 50 simulations temp3 <- results[city_ind<=50,mean(imp_err_med_pl),by=imp_true_pl] # SD error of 50 simulations temp4 <- results[city_ind<=50,sd(imp_err_med_pl),by=imp_true_pl] # Mean Osth of 50 simulations temp5 <- results[city_ind<=50,mean(imp_err_osth_pl),by=imp_true_pl] # Mean TLD of 50 simulations temp6 <- results[city_ind<=50,mean(imp_err_TLD_pl),by=imp_true_pl] # Mean median travel time of 50 simulations temp7 <- results[city_ind<=50,mean(median_pl),by=imp_true_pl] setkey(results2, imp_true_pl) setnames(results2, "V1", "med_mean_est") setkey(temp1, imp_true_pl) setnames(temp1, "V1", "osth_mean_est") setkey(temp2, imp_true_pl) setnames(temp2, "V1", "TLD_mean_est") setkey(temp3, imp_true_pl) setnames(temp3, "V1", "med_mean_err") setkey(temp4, imp_true_pl) setnames(temp4, "V1", "med_sd_err") setkey(temp5, imp_true_pl) setnames(temp5, "V1", "osth_mean_err") setkey(temp6, imp_true_pl) setnames(temp6, "V1", "TLD_mean_err") setkey(temp7, imp_true_pl) setnames(temp7, "V1", "median_tt") results2 <- results2[temp1] results2 <- results2[temp2] results2 <- results2[temp3] results2 <- results2[temp4] results2 <- results2[temp5] results2 <- results2[temp6] results2 <- results2[temp7] results2 write.csv(x=results2, file="Exponential_Compare_111419.csv") # Show average percentage correct median by impedance value results3 <- results_med1[city_ind<=50,mean(med_50),by=loop_ind] temp1 <- results_med1[city_ind<=50,mean(med_100),by=loop_ind] temp2 <- results_med1[city_ind<=50,mean(med_150),by=loop_ind] temp3 <- results_med1[city_ind<=50,mean(med_200),by=loop_ind] temp4 <- results_med1[city_ind<=50,mean(med_250),by=loop_ind] temp5 <- results_med2[city_ind<=50,mean(med_50),by=loop_ind] temp6 <- results_med2[city_ind<=50,mean(med_100),by=loop_ind] temp7 <- results_med2[city_ind<=50,mean(med_150),by=loop_ind] temp8 <- results_med2[city_ind<=50,mean(med_200),by=loop_ind] temp9 <- results_med2[city_ind<=50,mean(med_250),by=loop_ind] setkey(results3, loop_ind) setkey(temp1, loop_ind) setkey(temp2, loop_ind) setkey(temp3, loop_ind) setkey(temp4, loop_ind) setkey(temp5, loop_ind) setkey(temp6, loop_ind) setkey(temp7, loop_ind) setkey(temp8, loop_ind) setkey(temp9, loop_ind) results3 <- results3[temp1] results3 <- results3[temp2] results3 <- results3[temp3] results3 <- results3[temp4] results3 <- results3[temp5] results3 <- results3[temp6] results3 <- results3[temp7] results3 <- results3[temp8] results3 <- results3[temp9] results3 write.csv(x=results3, file="Median_Estimates_111419.csv") ##### # ARCHIVE: Code no longer used #####