From c20df198ba7b25758b7f7ce6d4af3318c9c94c48 Mon Sep 17 00:00:00 2001
From: luoxiaojian <lxj193371@alibaba-inc.com>
Date: Fri, 23 Aug 2024 12:41:22 +0800
Subject: [PATCH] Refactor vertex map, imported pthash. (#168)

---
 CMakeLists.txt                                |   4 +-
 .../analytical_apps/cuda/pagerank/pagerank.h  |   3 +
 examples/analytical_apps/flags.cc             |   8 +-
 examples/analytical_apps/flags.h              |   3 +-
 examples/analytical_apps/lcc/lcc_opt.h        |  20 +-
 examples/analytical_apps/run_app.h            |  83 +-
 examples/analytical_apps/run_app_opt.h        | 270 ++++---
 examples/analytical_apps/run_cuda_app.h       | 126 +--
 .../append_only_edgecut_fragment.h            |  85 +-
 examples/gnn_sampler/run_sampler.cc           |   1 -
 grape/app/mutation_context.h                  |  77 +-
 grape/communication/sync_comm.h               |  64 +-
 grape/cuda/fragment/device_fragment.h         |   5 +-
 grape/cuda/fragment/host_fragment.h           |  62 +-
 grape/cuda/vertex_map/device_vertex_map.h     |  32 +-
 grape/fragment/basic_efile_fragment_loader.h  | 185 +++++
 grape/fragment/basic_fragment_loader.h        | 411 +++-------
 grape/fragment/basic_fragment_loader_base.h   | 440 ++++++++++
 grape/fragment/basic_fragment_mutator.h       | 100 +--
 grape/fragment/basic_local_fragment_loader.h  | 250 ++++++
 grape/fragment/basic_rb_fragment_loader.h     | 228 ++++++
 grape/fragment/csr_edgecut_fragment_base.h    |   1 -
 grape/fragment/edgecut_fragment_base.h        |   4 +-
 grape/fragment/ev_fragment_loader.h           |  67 +-
 grape/fragment/ev_fragment_mutator.h          |   1 -
 grape/fragment/ev_fragment_rebalance_loader.h | 432 ----------
 grape/fragment/fragment_base.h                |  24 +-
 grape/fragment/immutable_edgecut_fragment.h   |  86 +-
 grape/fragment/loader.h                       |  18 +-
 grape/fragment/mutable_edgecut_fragment.h     |  88 +-
 grape/fragment/rebalancer.h                   | 170 ++++
 grape/graph/id_indexer.h                      | 437 +++++++---
 grape/graph/immutable_csr.h                   |   7 +
 grape/graph/mutable_csr.h                     |  53 +-
 grape/types.h                                 |  27 +
 grape/util.h                                  |  34 +-
 grape/utils/concurrent_queue.h                |  14 +-
 grape/utils/pthash_utils/ef_sequence_view.h   | 149 ++++
 grape/utils/pthash_utils/encoders_view.h      |  62 ++
 grape/utils/pthash_utils/ph_indexer_view.h    |  81 ++
 grape/utils/pthash_utils/single_phf_view.h    | 218 +++++
 grape/utils/ref_vector.h                      |  85 ++
 grape/utils/string_view_vector.h              | 127 +++
 grape/vertex_map/global_vertex_map.h          | 318 --------
 grape/vertex_map/idxers/hashmap_idxer.h       | 130 +++
 grape/vertex_map/idxers/hashmap_idxer_view.h  | 157 ++++
 grape/vertex_map/idxers/idxer_base.h          | 105 +++
 grape/vertex_map/idxers/idxers.h              | 114 +++
 grape/vertex_map/idxers/local_idxer.h         | 121 +++
 grape/vertex_map/idxers/pthash_idxer.h        | 186 +++++
 grape/vertex_map/idxers/sorted_array_idxer.h  | 198 +++++
 grape/vertex_map/local_vertex_map.h           | 280 -------
 grape/vertex_map/partitioner.h                | 292 +++++++
 grape/vertex_map/vertex_map.h                 | 525 ++++++++++++
 grape/vertex_map/vertex_map_base.h            | 147 ----
 misc/app_tests.sh                             |  49 +-
 misc/cuda_app_tests.sh                        |  10 +-
 misc/load_tests.cc                            | 178 +++++
 misc/mutable_fragment_tests.cc                | 271 +++++++
 misc/vertex_map_tests.cc                      | 330 ++++++++
 tests/load_tests.cc                           | 178 +++++
 tests/mutable_fragment_tests.cc               |   2 +-
 tests/vertex_map_tests.cc                     | 401 ++++++----
 thirdparty/flat_hash_map/flat_hash_map.hpp    |   5 +
 .../external_memory_builder_single_phf.hpp    | 753 ++++++++++++++++++
 .../internal_memory_builder_single_phf.hpp    | 365 +++++++++
 thirdparty/pthash/builders/search.hpp         | 358 +++++++++
 thirdparty/pthash/builders/util.hpp           | 301 +++++++
 thirdparty/pthash/encoders/bit_vector.hpp     | 347 ++++++++
 thirdparty/pthash/encoders/compact_vector.hpp | 306 +++++++
 thirdparty/pthash/encoders/darray.hpp         | 185 +++++
 thirdparty/pthash/encoders/ef_sequence.hpp    | 145 ++++
 thirdparty/pthash/encoders/encoders.hpp       | 161 ++++
 thirdparty/pthash/encoders/util.hpp           | 114 +++
 thirdparty/pthash/essentials/essentials.hpp   | 644 +++++++++++++++
 thirdparty/pthash/fastmod/fastmod.h           | 209 +++++
 thirdparty/pthash/mm_file/mm_file.hpp         | 176 ++++
 thirdparty/pthash/pthash.hpp                  |  25 +
 thirdparty/pthash/single_phf.hpp              | 159 ++++
 thirdparty/pthash/utils/bucketers.hpp         |  92 +++
 thirdparty/pthash/utils/hasher.hpp            | 188 +++++
 thirdparty/pthash/utils/logger.hpp            |  87 ++
 thirdparty/pthash/utils/util.hpp              |  57 ++
 83 files changed, 10968 insertions(+), 2343 deletions(-)
 create mode 100644 grape/fragment/basic_efile_fragment_loader.h
 create mode 100644 grape/fragment/basic_fragment_loader_base.h
 create mode 100644 grape/fragment/basic_local_fragment_loader.h
 create mode 100644 grape/fragment/basic_rb_fragment_loader.h
 delete mode 100644 grape/fragment/ev_fragment_rebalance_loader.h
 create mode 100644 grape/fragment/rebalancer.h
 create mode 100644 grape/utils/pthash_utils/ef_sequence_view.h
 create mode 100644 grape/utils/pthash_utils/encoders_view.h
 create mode 100644 grape/utils/pthash_utils/ph_indexer_view.h
 create mode 100644 grape/utils/pthash_utils/single_phf_view.h
 create mode 100644 grape/utils/ref_vector.h
 delete mode 100644 grape/vertex_map/global_vertex_map.h
 create mode 100644 grape/vertex_map/idxers/hashmap_idxer.h
 create mode 100644 grape/vertex_map/idxers/hashmap_idxer_view.h
 create mode 100644 grape/vertex_map/idxers/idxer_base.h
 create mode 100644 grape/vertex_map/idxers/idxers.h
 create mode 100644 grape/vertex_map/idxers/local_idxer.h
 create mode 100644 grape/vertex_map/idxers/pthash_idxer.h
 create mode 100644 grape/vertex_map/idxers/sorted_array_idxer.h
 delete mode 100644 grape/vertex_map/local_vertex_map.h
 create mode 100644 grape/vertex_map/partitioner.h
 create mode 100644 grape/vertex_map/vertex_map.h
 delete mode 100644 grape/vertex_map/vertex_map_base.h
 create mode 100644 misc/load_tests.cc
 create mode 100644 misc/mutable_fragment_tests.cc
 create mode 100644 misc/vertex_map_tests.cc
 create mode 100644 tests/load_tests.cc
 create mode 100644 thirdparty/pthash/builders/external_memory_builder_single_phf.hpp
 create mode 100644 thirdparty/pthash/builders/internal_memory_builder_single_phf.hpp
 create mode 100644 thirdparty/pthash/builders/search.hpp
 create mode 100644 thirdparty/pthash/builders/util.hpp
 create mode 100644 thirdparty/pthash/encoders/bit_vector.hpp
 create mode 100644 thirdparty/pthash/encoders/compact_vector.hpp
 create mode 100644 thirdparty/pthash/encoders/darray.hpp
 create mode 100644 thirdparty/pthash/encoders/ef_sequence.hpp
 create mode 100644 thirdparty/pthash/encoders/encoders.hpp
 create mode 100644 thirdparty/pthash/encoders/util.hpp
 create mode 100644 thirdparty/pthash/essentials/essentials.hpp
 create mode 100644 thirdparty/pthash/fastmod/fastmod.h
 create mode 100644 thirdparty/pthash/mm_file/mm_file.hpp
 create mode 100644 thirdparty/pthash/pthash.hpp
 create mode 100644 thirdparty/pthash/single_phf.hpp
 create mode 100644 thirdparty/pthash/utils/bucketers.hpp
 create mode 100644 thirdparty/pthash/utils/hasher.hpp
 create mode 100644 thirdparty/pthash/utils/logger.hpp
 create mode 100644 thirdparty/pthash/utils/util.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e2f58893..c39a06a2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -79,10 +79,10 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall")
 if (APPLE)
   set(CMAKE_MACOSX_RPATH ON)
 else ()
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -Werror -Wl,-rpath,$ORIGIN")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -Werror -Wl,-rpath,$ORIGIN -march=native")
 endif ()
 if (USE_SIMD)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -march=native")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2")
 endif ()
 set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g -fprofile-arcs -ftest-coverage")
 set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -g")
diff --git a/examples/analytical_apps/cuda/pagerank/pagerank.h b/examples/analytical_apps/cuda/pagerank/pagerank.h
index 86a623f4..a1e7ffa2 100644
--- a/examples/analytical_apps/cuda/pagerank/pagerank.h
+++ b/examples/analytical_apps/cuda/pagerank/pagerank.h
@@ -18,6 +18,9 @@ limitations under the License.
 #ifdef __CUDACC__
 #include "cuda/app_config.h"
 #include "grape/grape.h"
+#include <thrust/device_vector.h>
+#include <thrust/execution_policy.h>
+#include <thrust/transform_reduce.h>
 namespace grape {
 namespace cuda {
 
diff --git a/examples/analytical_apps/flags.cc b/examples/analytical_apps/flags.cc
index 17e24c48..08acc594 100644
--- a/examples/analytical_apps/flags.cc
+++ b/examples/analytical_apps/flags.cc
@@ -43,9 +43,13 @@ DEFINE_int32(kcore_k, 0, "k value of kcore.");
 DEFINE_int32(kclique_k, 0, "k value of kclique.");
 
 DEFINE_bool(opt, false, "whether to use optimization.");
+DEFINE_string(partitioner_type, "map",
+              "partitioner type, these options can be used: "
+              "hash, map, segment");
+DEFINE_string(idxer_type, "hashmap",
+              "idxer type, these options can be used: "
+              "sorted_array, hashmap, pthash, local");
 
-DEFINE_bool(segmented_partition, true,
-            "whether to use segmented partitioning.");
 DEFINE_bool(rebalance, false, "whether to rebalance graph after loading.");
 DEFINE_int32(rebalance_vertex_factor, 0, "vertex factor of rebalancing.");
 
diff --git a/examples/analytical_apps/flags.h b/examples/analytical_apps/flags.h
index 95b0f3e2..03c51dd7 100644
--- a/examples/analytical_apps/flags.h
+++ b/examples/analytical_apps/flags.h
@@ -40,8 +40,9 @@ DECLARE_int32(kclique_k);
 DECLARE_int32(degree_threshold);
 
 DECLARE_bool(opt);
+DECLARE_string(partitioner_type);
+DECLARE_string(idxer_type);
 
-DECLARE_bool(segmented_partition);
 DECLARE_bool(rebalance);
 DECLARE_int32(rebalance_vertex_factor);
 
diff --git a/examples/analytical_apps/lcc/lcc_opt.h b/examples/analytical_apps/lcc/lcc_opt.h
index 20c94d57..ba815a49 100644
--- a/examples/analytical_apps/lcc/lcc_opt.h
+++ b/examples/analytical_apps/lcc/lcc_opt.h
@@ -351,14 +351,15 @@ class LCCOpt<FRAG_T, COUNT_T,
     auto inner_vertices = frag.InnerVertices();
 
     messages.InitChannels(thread_num());
+    auto& channels = messages.Channels();
 
     ctx.stage = 0;
 
     // Each vertex scatter its own out degree.
-    ForEach(inner_vertices, [&messages, &frag, &ctx](int tid, vertex_t v) {
+    ForEach(inner_vertices, [&channels, &frag, &ctx](int tid, vertex_t v) {
       ctx.global_degree[v] = frag.GetLocalOutDegree(v);
-      messages.SendMsgThroughOEdges<fragment_t, int>(frag, v,
-                                                     ctx.global_degree[v], tid);
+      channels[tid].SendMsgThroughOEdges<fragment_t, int>(frag, v,
+                                                          ctx.global_degree[v]);
     });
 
     // Just in case we are running on single process and no messages will
@@ -504,6 +505,7 @@ class LCCOpt<FRAG_T, COUNT_T,
                message_manager_t& messages) {
     auto inner_vertices = frag.InnerVertices();
     auto outer_vertices = frag.OuterVertices();
+    auto& channels = messages.Channels();
 
     if (ctx.stage == 0) {
       ctx.stage = 1;
@@ -511,7 +513,7 @@ class LCCOpt<FRAG_T, COUNT_T,
           thread_num(), frag,
           [&ctx](int tid, vertex_t u, int msg) { ctx.global_degree[u] = msg; });
       ctx.memory_pools.resize(thread_num());
-      ForEach(inner_vertices, [&frag, &ctx, &messages](int tid, vertex_t v) {
+      ForEach(inner_vertices, [&frag, &ctx, &channels](int tid, vertex_t v) {
         vid_t v_gid_hash = IdHasher<vid_t>::hash(frag.GetInnerVertexGid(v));
         auto& pool = ctx.memory_pools[tid];
         auto& nbr_vec = ctx.complete_neighbor[v];
@@ -543,8 +545,8 @@ class LCCOpt<FRAG_T, COUNT_T,
 #else
         std::sort(nbr_ptr, nbr_ptr + nbr_vec.size());
 #endif
-        messages.SendMsgThroughOEdges<fragment_t, VecOutType>(frag, v, msg_vec,
-                                                              tid);
+        channels[tid].SendMsgThroughOEdges<fragment_t, VecOutType>(frag, v,
+                                                                   msg_vec);
       });
       messages.ForceContinue();
     } else if (ctx.stage == 1) {
@@ -586,10 +588,10 @@ class LCCOpt<FRAG_T, COUNT_T,
           atomic_add(ctx.tricnt[v], v_count);
         }
       });
-      ForEach(outer_vertices, [&messages, &frag, &ctx](int tid, vertex_t v) {
+      ForEach(outer_vertices, [&channels, &frag, &ctx](int tid, vertex_t v) {
         if (ctx.tricnt[v] != 0) {
-          messages.SyncStateOnOuterVertex<fragment_t, count_t>(
-              frag, v, ctx.tricnt[v], tid);
+          channels[tid].SyncStateOnOuterVertex<fragment_t, count_t>(
+              frag, v, ctx.tricnt[v]);
         }
       });
       messages.ForceContinue();
diff --git a/examples/analytical_apps/run_app.h b/examples/analytical_apps/run_app.h
index f396331a..aa586138 100644
--- a/examples/analytical_apps/run_app.h
+++ b/examples/analytical_apps/run_app.h
@@ -33,7 +33,6 @@ limitations under the License.
 #include <grape/fragment/loader.h>
 #include <grape/grape.h>
 #include <grape/util.h>
-#include <grape/vertex_map/global_vertex_map.h>
 
 #ifdef GRANULA
 #include "thirdparty/atlarge-research-granula/granula.hpp"
@@ -73,10 +72,7 @@ void Init() {
   if (FLAGS_deserialize && FLAGS_serialization_prefix.empty()) {
     LOG(FATAL) << "Please assign a serialization prefix.";
   } else if (FLAGS_efile.empty()) {
-    LOG(FATAL) << "Please assign input edge files.";
-  } else if (FLAGS_vfile.empty() && FLAGS_segmented_partition) {
-    LOG(FATAL) << "EFragmentLoader dosen't support Segmented Partitioner. "
-                  "Please assign vertex files or use Hash Partitioner";
+    LOG(FATAL) << "Please assign input edge file.";
   }
 
   if (!FLAGS_out_prefix.empty() && access(FLAGS_out_prefix.c_str(), 0) != 0) {
@@ -173,28 +169,19 @@ void CreateAndQuery(const CommSpec& comm_spec, const std::string& out_prefix,
   } else if (FLAGS_serialize) {
     graph_spec.set_serialize(true, FLAGS_serialization_prefix);
   }
-  if (FLAGS_segmented_partition) {
-    using VertexMapType =
-        GlobalVertexMap<OID_T, VID_T, SegmentedPartitioner<OID_T>>;
-    using FRAG_T = ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                            load_strategy, VertexMapType>;
-    std::shared_ptr<FRAG_T> fragment =
-        LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
-    using AppType = APP_T<FRAG_T>;
-    auto app = std::make_shared<AppType>();
-    DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec,
-                                      out_prefix, args...);
-  } else {
-    graph_spec.set_rebalance(false, 0);
-    using FRAG_T =
-        ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T, load_strategy>;
-    std::shared_ptr<FRAG_T> fragment =
-        LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
-    using AppType = APP_T<FRAG_T>;
-    auto app = std::make_shared<AppType>();
-    DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec,
-                                      out_prefix, args...);
-  }
+
+  graph_spec.partitioner_type =
+      grape::parse_partitioner_type_name(FLAGS_partitioner_type);
+  graph_spec.idxer_type = grape::parse_idxer_type_name(FLAGS_idxer_type);
+
+  using FRAG_T =
+      ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T, load_strategy>;
+  std::shared_ptr<FRAG_T> fragment =
+      LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
+  using AppType = APP_T<FRAG_T>;
+  auto app = std::make_shared<AppType>();
+  DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec, out_prefix,
+                                    args...);
 }
 
 template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T,
@@ -212,32 +199,22 @@ void CreateAndQueryStagedApp(const CommSpec& comm_spec,
   } else if (FLAGS_serialize) {
     graph_spec.set_serialize(true, FLAGS_serialization_prefix);
   }
-  if (FLAGS_segmented_partition) {
-    using VertexMapType =
-        GlobalVertexMap<OID_T, VID_T, SegmentedPartitioner<OID_T>>;
-    using FRAG_T = ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                            load_strategy, VertexMapType>;
-    std::shared_ptr<FRAG_T> fragment =
-        LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
-    using App1Type = APP1_T<FRAG_T>;
-    auto app1 = std::make_shared<App1Type>();
-    using App2Type = APP2_T<FRAG_T>;
-    auto app2 = std::make_shared<App2Type>();
-    DoDualQuery<FRAG_T, App1Type, App2Type, Args...>(
-        fragment, app1, app2, comm_spec, spec, out_prefix, args...);
-  } else {
-    graph_spec.set_rebalance(false, 0);
-    using FRAG_T =
-        ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T, load_strategy>;
-    std::shared_ptr<FRAG_T> fragment =
-        LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
-    using App1Type = APP1_T<FRAG_T>;
-    auto app1 = std::make_shared<App1Type>();
-    using App2Type = APP2_T<FRAG_T>;
-    auto app2 = std::make_shared<App2Type>();
-    DoDualQuery<FRAG_T, App1Type, App2Type, Args...>(
-        fragment, app1, app2, comm_spec, spec, out_prefix, args...);
-  }
+
+  graph_spec.partitioner_type =
+      grape::parse_partitioner_type_name(FLAGS_partitioner_type);
+  graph_spec.idxer_type = grape::parse_idxer_type_name(FLAGS_idxer_type);
+
+  using FRAG_T =
+      ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T, load_strategy>;
+  std::shared_ptr<FRAG_T> fragment =
+      LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
+
+  using App1Type = APP1_T<FRAG_T>;
+  auto app1 = std::make_shared<App1Type>();
+  using App2Type = APP2_T<FRAG_T>;
+  auto app2 = std::make_shared<App2Type>();
+  DoDualQuery<FRAG_T, App1Type, App2Type, Args...>(
+      fragment, app1, app2, comm_spec, spec, out_prefix, args...);
 }
 
 template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T>
diff --git a/examples/analytical_apps/run_app_opt.h b/examples/analytical_apps/run_app_opt.h
index e5c93c34..24ce04cd 100644
--- a/examples/analytical_apps/run_app_opt.h
+++ b/examples/analytical_apps/run_app_opt.h
@@ -66,63 +66,20 @@ void RunUndirectedPageRankOpt(const CommSpec& comm_spec,
   graph_spec.set_rebalance(FLAGS_rebalance, FLAGS_rebalance_vertex_factor);
   if (FLAGS_deserialize) {
     graph_spec.set_deserialize(true, FLAGS_serialization_prefix);
-  } else if (FLAGS_serialize) {
+  }
+  if (FLAGS_serialize) {
     graph_spec.set_serialize(true, FLAGS_serialization_prefix);
   }
-  if (FLAGS_segmented_partition) {
-    using VertexMapType =
-        GlobalVertexMap<int64_t, uint32_t, SegmentedPartitioner<int64_t>>;
-    using FRAG_T =
-        ImmutableEdgecutFragment<int64_t, uint32_t, EmptyType, EmptyType,
-                                 load_strategy, VertexMapType>;
-    std::shared_ptr<FRAG_T> fragment =
-        LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
-    bool push;
-    if (fragment->fnum() >= 8) {
-      uint64_t local_ivnum = fragment->GetInnerVerticesNum();
-      uint64_t local_ovnum = fragment->GetOuterVerticesNum();
-      uint64_t total_ivnum, total_ovnum;
-      MPI_Allreduce(&local_ivnum, &total_ivnum, 1, MPI_UINT64_T, MPI_SUM,
-                    comm_spec.comm());
-      MPI_Allreduce(&local_ovnum, &total_ovnum, 1, MPI_UINT64_T, MPI_SUM,
-                    comm_spec.comm());
-
-      double avg_degree = static_cast<double>(FLAGS_edge_num) /
-                          static_cast<double>(FLAGS_vertex_num);
-      double rate =
-          static_cast<double>(total_ovnum) / static_cast<double>(total_ivnum);
-
-      if (rate < 0.5) {
-        // not to many outer vertices
-        push = true;
-      } else if (avg_degree > 60) {
-        // dense
-        push = true;
-      } else {
-        push = false;
-      }
-    } else {
-      push = true;
-    }
-
-    if (!push) {
-      using AppType = PageRankOpt<FRAG_T>;
-      auto app = std::make_shared<AppType>();
-      DoQuery<FRAG_T, AppType, double, int>(fragment, app, comm_spec, spec,
-                                            out_prefix, delta, mr);
-    } else {
-      using AppType = PageRankPushOpt<FRAG_T>;
-      auto app = std::make_shared<AppType>();
-      DoQuery<FRAG_T, AppType, double, int>(fragment, app, comm_spec, spec,
-                                            out_prefix, delta, mr);
-    }
-  } else {
-    graph_spec.set_rebalance(false, 0);
-    using FRAG_T = ImmutableEdgecutFragment<int64_t, uint32_t, EmptyType,
-                                            EmptyType, load_strategy>;
-    std::shared_ptr<FRAG_T> fragment =
-        LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
+  graph_spec.partitioner_type =
+      parse_partitioner_type_name(FLAGS_partitioner_type);
+  graph_spec.idxer_type = parse_idxer_type_name(FLAGS_idxer_type);
 
+  using FRAG_T = ImmutableEdgecutFragment<int64_t, uint32_t, EmptyType,
+                                          EmptyType, load_strategy>;
+  std::shared_ptr<FRAG_T> fragment =
+      LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
+  bool push;
+  if (fragment->fnum() >= 8) {
     uint64_t local_ivnum = fragment->GetInnerVerticesNum();
     uint64_t local_ovnum = fragment->GetOuterVerticesNum();
     uint64_t total_ivnum, total_ovnum;
@@ -131,18 +88,34 @@ void RunUndirectedPageRankOpt(const CommSpec& comm_spec,
     MPI_Allreduce(&local_ovnum, &total_ovnum, 1, MPI_UINT64_T, MPI_SUM,
                   comm_spec.comm());
 
-    if (static_cast<double>(total_ovnum) >
-        static_cast<double>(total_ivnum) * 3.2) {
-      using AppType = PageRank<FRAG_T>;
-      auto app = std::make_shared<AppType>();
-      DoQuery<FRAG_T, AppType, double, int>(fragment, app, comm_spec, spec,
-                                            out_prefix, delta, mr);
+    double avg_degree = static_cast<double>(FLAGS_edge_num) /
+                        static_cast<double>(FLAGS_vertex_num);
+    double rate =
+        static_cast<double>(total_ovnum) / static_cast<double>(total_ivnum);
+
+    if (rate < 0.5) {
+      // not to many outer vertices
+      push = true;
+    } else if (avg_degree > 60) {
+      // dense
+      push = true;
     } else {
-      using AppType = PageRankPushOpt<FRAG_T>;
-      auto app = std::make_shared<AppType>();
-      DoQuery<FRAG_T, AppType, double, int>(fragment, app, comm_spec, spec,
-                                            out_prefix, delta, mr);
+      push = false;
     }
+  } else {
+    push = true;
+  }
+
+  if (!push) {
+    using AppType = PageRankOpt<FRAG_T>;
+    auto app = std::make_shared<AppType>();
+    DoQuery<FRAG_T, AppType, double, int>(fragment, app, comm_spec, spec,
+                                          out_prefix, delta, mr);
+  } else {
+    using AppType = PageRankPushOpt<FRAG_T>;
+    auto app = std::make_shared<AppType>();
+    DoQuery<FRAG_T, AppType, double, int>(fragment, app, comm_spec, spec,
+                                          out_prefix, delta, mr);
   }
 }
 
@@ -200,9 +173,13 @@ void RunDirectedCDLP(const CommSpec& comm_spec, const std::string& out_prefix,
   graph_spec.set_rebalance(FLAGS_rebalance, FLAGS_rebalance_vertex_factor);
   if (FLAGS_deserialize) {
     graph_spec.set_deserialize(true, FLAGS_serialization_prefix);
-  } else if (FLAGS_serialize) {
+  }
+  if (FLAGS_serialize) {
     graph_spec.set_serialize(true, FLAGS_serialization_prefix);
   }
+  graph_spec.partitioner_type =
+      parse_partitioner_type_name(FLAGS_partitioner_type);
+  graph_spec.idxer_type = parse_idxer_type_name(FLAGS_idxer_type);
 
   using FRAG_T = ImmutableEdgecutFragment<int64_t, uint32_t, EmptyType,
                                           EmptyType, LoadStrategy::kOnlyOut>;
@@ -211,7 +188,7 @@ void RunDirectedCDLP(const CommSpec& comm_spec, const std::string& out_prefix,
       LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
 
   std::pair<int64_t, int64_t> min_max_id =
-      get_min_max_id(*fragment->GetVertexMap());
+      get_min_max_id(fragment->GetVertexMap());
   if (is_int32(min_max_id.first) && is_int32(min_max_id.second)) {
     using AppType = CDLPOpt<FRAG_T, int32_t>;
     auto app = std::make_shared<AppType>();
@@ -233,15 +210,16 @@ void RunUndirectedCDLP(const CommSpec& comm_spec, const std::string& out_prefix,
   graph_spec.set_rebalance(FLAGS_rebalance, FLAGS_rebalance_vertex_factor);
   if (FLAGS_deserialize) {
     graph_spec.set_deserialize(true, FLAGS_serialization_prefix);
-  } else if (FLAGS_serialize) {
+  }
+  if (FLAGS_serialize) {
     graph_spec.set_serialize(true, FLAGS_serialization_prefix);
   }
+  graph_spec.partitioner_type =
+      parse_partitioner_type_name(FLAGS_partitioner_type);
+  graph_spec.idxer_type = parse_idxer_type_name(FLAGS_idxer_type);
 
-  using VertexMapType =
-      GlobalVertexMap<int64_t, uint32_t, SegmentedPartitioner<int64_t>>;
-  using FRAG_T =
-      ImmutableEdgecutFragment<int64_t, uint32_t, EmptyType, EmptyType,
-                               LoadStrategy::kOnlyOut, VertexMapType>;
+  using FRAG_T = ImmutableEdgecutFragment<int64_t, uint32_t, EmptyType,
+                                          EmptyType, LoadStrategy::kOnlyOut>;
 
   std::shared_ptr<FRAG_T> fragment =
       LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
@@ -249,7 +227,7 @@ void RunUndirectedCDLP(const CommSpec& comm_spec, const std::string& out_prefix,
   double avg_degree = static_cast<double>(FLAGS_edge_num) /
                       static_cast<double>(FLAGS_vertex_num);
   std::pair<int64_t, int64_t> min_max_id =
-      get_min_max_id(*fragment->GetVertexMap());
+      get_min_max_id(fragment->GetVertexMap());
   if (is_int32(min_max_id.first) && is_int32(min_max_id.second)) {
     if (avg_degree > 256) {
       using AppType = CDLPOptUDDense<FRAG_T, int32_t>;
@@ -287,32 +265,22 @@ void CreateAndQueryOpt(const CommSpec& comm_spec, const std::string& out_prefix,
   graph_spec.set_rebalance(FLAGS_rebalance, FLAGS_rebalance_vertex_factor);
   if (FLAGS_deserialize) {
     graph_spec.set_deserialize(true, FLAGS_serialization_prefix);
-  } else if (FLAGS_serialize) {
-    graph_spec.set_serialize(true, FLAGS_serialization_prefix);
   }
-  if (FLAGS_segmented_partition) {
-    using VertexMapType =
-        GlobalVertexMap<int64_t, uint32_t, SegmentedPartitioner<int64_t>>;
-    using FRAG_T =
-        ImmutableEdgecutFragment<int64_t, uint32_t, grape::EmptyType, EDATA_T,
-                                 load_strategy, VertexMapType>;
-    std::shared_ptr<FRAG_T> fragment =
-        LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
-    using AppType = APP_T<FRAG_T>;
-    auto app = std::make_shared<AppType>();
-    DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec,
-                                      out_prefix, args...);
-  } else {
-    graph_spec.set_rebalance(false, 0);
-    using FRAG_T = ImmutableEdgecutFragment<int64_t, uint32_t, grape::EmptyType,
-                                            EDATA_T, load_strategy>;
-    std::shared_ptr<FRAG_T> fragment =
-        LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
-    using AppType = APP_T<FRAG_T>;
-    auto app = std::make_shared<AppType>();
-    DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec,
-                                      out_prefix, args...);
+  if (FLAGS_serialize) {
+    graph_spec.set_serialize(true, FLAGS_serialization_prefix);
   }
+  graph_spec.partitioner_type =
+      parse_partitioner_type_name(FLAGS_partitioner_type);
+  graph_spec.idxer_type = parse_idxer_type_name(FLAGS_idxer_type);
+
+  using FRAG_T = ImmutableEdgecutFragment<int64_t, uint32_t, grape::EmptyType,
+                                          EDATA_T, load_strategy>;
+  std::shared_ptr<FRAG_T> fragment =
+      LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
+  using AppType = APP_T<FRAG_T>;
+  auto app = std::make_shared<AppType>();
+  DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec, out_prefix,
+                                    args...);
 }
 
 template <typename EDATA_T, LoadStrategy load_strategy,
@@ -330,33 +298,20 @@ void CreateAndQueryStagedAppOpt(const CommSpec& comm_spec,
   } else if (FLAGS_serialize) {
     graph_spec.set_serialize(true, FLAGS_serialization_prefix);
   }
-  if (FLAGS_segmented_partition) {
-    using VertexMapType =
-        GlobalVertexMap<int64_t, uint32_t, SegmentedPartitioner<int64_t>>;
-    using FRAG_T =
-        ImmutableEdgecutFragment<int64_t, uint32_t, grape::EmptyType, EDATA_T,
-                                 load_strategy, VertexMapType>;
-    std::shared_ptr<FRAG_T> fragment =
-        LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
-    using App1Type = APP1_T<FRAG_T>;
-    auto app1 = std::make_shared<App1Type>();
-    using App2Type = APP2_T<FRAG_T>;
-    auto app2 = std::make_shared<App2Type>();
-    DoDualQuery<FRAG_T, App1Type, App2Type, Args...>(
-        fragment, app1, app2, comm_spec, spec, out_prefix, args...);
-  } else {
-    graph_spec.set_rebalance(false, 0);
-    using FRAG_T = ImmutableEdgecutFragment<int64_t, uint32_t, grape::EmptyType,
-                                            EDATA_T, load_strategy>;
-    std::shared_ptr<FRAG_T> fragment =
-        LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
-    using App1Type = APP1_T<FRAG_T>;
-    auto app1 = std::make_shared<App1Type>();
-    using App2Type = APP2_T<FRAG_T>;
-    auto app2 = std::make_shared<App2Type>();
-    DoDualQuery<FRAG_T, App1Type, App2Type, Args...>(
-        fragment, app1, app2, comm_spec, spec, out_prefix, args...);
-  }
+  graph_spec.partitioner_type =
+      parse_partitioner_type_name(FLAGS_partitioner_type);
+  graph_spec.idxer_type = parse_idxer_type_name(FLAGS_idxer_type);
+
+  using FRAG_T = ImmutableEdgecutFragment<int64_t, uint32_t, grape::EmptyType,
+                                          EDATA_T, load_strategy>;
+  std::shared_ptr<FRAG_T> fragment =
+      LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
+  using App1Type = APP1_T<FRAG_T>;
+  auto app1 = std::make_shared<App1Type>();
+  using App2Type = APP2_T<FRAG_T>;
+  auto app2 = std::make_shared<App2Type>();
+  DoDualQuery<FRAG_T, App1Type, App2Type, Args...>(
+      fragment, app1, app2, comm_spec, spec, out_prefix, args...);
 }
 
 void RunOpt() {
@@ -384,55 +339,93 @@ void RunOpt() {
   }
   std::string name = FLAGS_application;
   if (name == "sssp") {
-    FLAGS_segmented_partition = true;
     FLAGS_rebalance = false;
+    if (FLAGS_partitioner_type == "default") {
+      FLAGS_partitioner_type = "segment";
+    }
+    if (FLAGS_idxer_type == "default") {
+      FLAGS_idxer_type = "sorted_array";
+    }
     CreateAndQueryOpt<double, LoadStrategy::kOnlyOut, SSSPOpt, int64_t>(
         comm_spec, out_prefix, spec, FLAGS_sssp_source);
   } else if (name == "bfs") {
+    FLAGS_rebalance = false;
+    if (FLAGS_partitioner_type == "default") {
+      FLAGS_partitioner_type = "segment";
+    }
+    if (FLAGS_idxer_type == "default") {
+      FLAGS_idxer_type = "sorted_array";
+    }
     if (FLAGS_directed) {
-      FLAGS_segmented_partition = true;
-      FLAGS_rebalance = false;
       CreateAndQueryOpt<EmptyType, LoadStrategy::kBothOutIn, BFSOpt, int64_t>(
           comm_spec, out_prefix, spec, FLAGS_bfs_source);
     } else {
-      FLAGS_segmented_partition = true;
-      FLAGS_rebalance = false;
       CreateAndQueryOpt<EmptyType, LoadStrategy::kOnlyOut, BFSOpt, int64_t>(
           comm_spec, out_prefix, spec, FLAGS_bfs_source);
     }
   } else if (name == "pagerank") {
     if (FLAGS_directed) {
-      FLAGS_segmented_partition = false;
+      if (FLAGS_partitioner_type == "default") {
+        FLAGS_partitioner_type = "hash";
+      }
+      if (FLAGS_idxer_type == "default") {
+        FLAGS_idxer_type = "pthash";
+      }
       CreateAndQueryOpt<EmptyType, LoadStrategy::kBothOutIn, PageRankDirected,
                         double, int>(comm_spec, out_prefix, spec, FLAGS_pr_d,
                                      FLAGS_pr_mr);
     } else {
-      FLAGS_segmented_partition = true;
       FLAGS_rebalance = true;
       FLAGS_rebalance_vertex_factor = 0;
+      if (FLAGS_partitioner_type == "default") {
+        FLAGS_partitioner_type = "segment";
+      }
+      if (FLAGS_idxer_type == "default") {
+        FLAGS_idxer_type = "sorted_array";
+      }
       RunUndirectedPageRankOpt<LoadStrategy::kOnlyOut>(
           comm_spec, out_prefix, spec, FLAGS_pr_d, FLAGS_pr_mr);
     }
   } else if (name == "cdlp") {
     if (FLAGS_directed) {
       FLAGS_directed = false;
-      FLAGS_segmented_partition = false;
+      if (FLAGS_partitioner_type == "default") {
+        FLAGS_partitioner_type = "hash";
+      }
+      if (FLAGS_idxer_type == "default") {
+        FLAGS_idxer_type = "pthash";
+      }
       RunDirectedCDLP(comm_spec, out_prefix, spec);
     } else {
-      FLAGS_segmented_partition = true;
       FLAGS_rebalance = true;
       FLAGS_rebalance_vertex_factor = 0;
+      if (FLAGS_partitioner_type == "default") {
+        FLAGS_partitioner_type = "segment";
+      }
+      if (FLAGS_idxer_type == "default") {
+        FLAGS_idxer_type = "sorted_array";
+      }
       RunUndirectedCDLP(comm_spec, out_prefix, spec);
     }
   } else if (name == "wcc") {
     FLAGS_directed = false;
-    FLAGS_segmented_partition = true;
     FLAGS_rebalance = false;
+    if (FLAGS_partitioner_type == "default") {
+      FLAGS_partitioner_type = "segment";
+    }
+    if (FLAGS_idxer_type == "default") {
+      FLAGS_idxer_type = "sorted_array";
+    }
     CreateAndQueryOpt<EmptyType, LoadStrategy::kOnlyOut, WCCOpt>(
         comm_spec, out_prefix, spec);
   } else if (name == "lcc") {
     if (FLAGS_directed) {
-      FLAGS_segmented_partition = false;
+      if (FLAGS_partitioner_type == "default") {
+        FLAGS_partitioner_type = "hash";
+      }
+      if (FLAGS_idxer_type == "default") {
+        FLAGS_idxer_type = "pthash";
+      }
       if (FLAGS_edge_num >
           static_cast<int64_t>(std::numeric_limits<uint32_t>::max())) {
         CreateAndQueryOpt<EmptyType, LoadStrategy::kBothOutIn, LCCDirected64>(
@@ -442,9 +435,14 @@ void RunOpt() {
             comm_spec, out_prefix, spec);
       }
     } else {
-      FLAGS_segmented_partition = true;
       FLAGS_rebalance = true;
       FLAGS_rebalance_vertex_factor = 0;
+      if (FLAGS_partitioner_type == "default") {
+        FLAGS_partitioner_type = "segment";
+      }
+      if (FLAGS_idxer_type == "default") {
+        FLAGS_idxer_type = "sorted_array";
+      }
       if (FLAGS_edge_num >
           static_cast<int64_t>(std::numeric_limits<uint32_t>::max()) * 2) {
         CreateAndQueryOpt<EmptyType, LoadStrategy::kOnlyOut, LCC64>(
diff --git a/examples/analytical_apps/run_cuda_app.h b/examples/analytical_apps/run_cuda_app.h
index 284da2f8..4705aa2a 100644
--- a/examples/analytical_apps/run_cuda_app.h
+++ b/examples/analytical_apps/run_cuda_app.h
@@ -155,51 +155,28 @@ void CreateAndQueryWithPreprocess(const grape::CommSpec& comm_spec,
   } else if (FLAGS_serialize) {
     graph_spec.set_serialize(true, FLAGS_serialization_prefix);
   }
-  if (FLAGS_segmented_partition) {
-    using VERTEX_MAP_T =
-        GlobalVertexMap<OID_T, VID_T, SegmentedPartitioner<OID_T>>;
-    using FRAG_T = grape::cuda::HostFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                             load_strategy, VERTEX_MAP_T>;
-    std::shared_ptr<FRAG_T> fragment;
-    int dev_id = comm_spec.local_id();
-    int dev_count;
-
-    CHECK_CUDA(cudaGetDeviceCount(&dev_count));
-    CHECK_LE(comm_spec.local_num(), dev_count)
-        << "Only found " << dev_count << " GPUs, but " << comm_spec.local_num()
-        << " processes are launched";
-    CHECK_CUDA(cudaSetDevice(dev_id));
-    fragment = LoadGraph<FRAG_T>(efile, vfile, comm_spec, graph_spec);
-
-    auto app = std::make_shared<APP_T<FRAG_T>>();
-    auto pre = std::make_shared<PRE_T<FRAG_T>>();
-    DoPreprocess<FRAG_T, PRE_T<FRAG_T>, Args...>(fragment, pre, comm_spec,
-                                                 dev_id, out_prefix, args...);
-    DoQuery<FRAG_T, APP_T<FRAG_T>, Args...>(fragment, app, comm_spec, dev_id,
-                                            out_prefix, args...);
-  } else {
-    graph_spec.set_rebalance(false, 0);
-    using VERTEX_MAP_T = GlobalVertexMap<OID_T, VID_T, HashPartitioner<OID_T>>;
-    using FRAG_T = grape::cuda::HostFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                             load_strategy, VERTEX_MAP_T>;
-    std::shared_ptr<FRAG_T> fragment;
-    int dev_id = comm_spec.local_id();
-    int dev_count;
-
-    CHECK_CUDA(cudaGetDeviceCount(&dev_count));
-    CHECK_LE(comm_spec.local_num(), dev_count)
-        << "Only found " << dev_count << " GPUs, but " << comm_spec.local_num()
-        << " processes are launched";
-    CHECK_CUDA(cudaSetDevice(dev_id));
-    fragment = LoadGraph<FRAG_T>(efile, vfile, comm_spec, graph_spec);
-
-    auto app = std::make_shared<APP_T<FRAG_T>>();
-    auto pre = std::make_shared<PRE_T<FRAG_T>>();
-    DoPreprocess<FRAG_T, PRE_T<FRAG_T>, Args...>(fragment, pre, comm_spec,
-                                                 dev_id, out_prefix, args...);
-    DoQuery<FRAG_T, APP_T<FRAG_T>, Args...>(fragment, app, comm_spec, dev_id,
-                                            out_prefix, args...);
-  }
+  graph_spec.partitioner_type =
+      grape::parse_partitioner_type_name(FLAGS_partitioner_type);
+  graph_spec.idxer_type = grape::parse_idxer_type_name(FLAGS_idxer_type);
+  using FRAG_T = grape::cuda::HostFragment<OID_T, VID_T, VDATA_T, EDATA_T,
+                                           load_strategy>;
+  std::shared_ptr<FRAG_T> fragment;
+  int dev_id = comm_spec.local_id();
+  int dev_count;
+
+  CHECK_CUDA(cudaGetDeviceCount(&dev_count));
+  CHECK_LE(comm_spec.local_num(), dev_count)
+      << "Only found " << dev_count << " GPUs, but " << comm_spec.local_num()
+      << " processes are launched";
+  CHECK_CUDA(cudaSetDevice(dev_id));
+  fragment = LoadGraph<FRAG_T>(efile, vfile, comm_spec, graph_spec);
+
+  auto app = std::make_shared<APP_T<FRAG_T>>();
+  auto pre = std::make_shared<PRE_T<FRAG_T>>();
+  DoPreprocess<FRAG_T, PRE_T<FRAG_T>, Args...>(fragment, pre, comm_spec,
+                                               dev_id, out_prefix, args...);
+  DoQuery<FRAG_T, APP_T<FRAG_T>, Args...>(fragment, app, comm_spec, dev_id,
+                                          out_prefix, args...);
 }
 
 template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T,
@@ -220,45 +197,26 @@ void CreateAndQuery(const grape::CommSpec& comm_spec, const std::string& efile,
   } else if (FLAGS_serialize) {
     graph_spec.set_serialize(true, FLAGS_serialization_prefix);
   }
-  if (FLAGS_segmented_partition) {
-    using VERTEX_MAP_T =
-        GlobalVertexMap<OID_T, VID_T, SegmentedPartitioner<OID_T>>;
-    using FRAG_T = grape::cuda::HostFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                             load_strategy, VERTEX_MAP_T>;
-    std::shared_ptr<FRAG_T> fragment;
-    int dev_id = comm_spec.local_id();
-    int dev_count;
-
-    CHECK_CUDA(cudaGetDeviceCount(&dev_count));
-    CHECK_LE(comm_spec.local_num(), dev_count)
-        << "Only found " << dev_count << " GPUs, but " << comm_spec.local_num()
-        << " processes are launched";
-    CHECK_CUDA(cudaSetDevice(dev_id));
-    fragment = LoadGraph<FRAG_T>(efile, vfile, comm_spec, graph_spec);
-
-    auto app = std::make_shared<APP_T<FRAG_T>>();
-    DoQuery<FRAG_T, APP_T<FRAG_T>, Args...>(fragment, app, comm_spec, dev_id,
-                                            out_prefix, args...);
-  } else {
-    graph_spec.set_rebalance(false, 0);
-    using VERTEX_MAP_T = GlobalVertexMap<OID_T, VID_T, HashPartitioner<OID_T>>;
-    using FRAG_T = grape::cuda::HostFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                             load_strategy, VERTEX_MAP_T>;
-    std::shared_ptr<FRAG_T> fragment;
-    int dev_id = comm_spec.local_id();
-    int dev_count;
-
-    CHECK_CUDA(cudaGetDeviceCount(&dev_count));
-    CHECK_LE(comm_spec.local_num(), dev_count)
-        << "Only found " << dev_count << " GPUs, but " << comm_spec.local_num()
-        << " processes are launched";
-    CHECK_CUDA(cudaSetDevice(dev_id));
-    fragment = LoadGraph<FRAG_T>(efile, vfile, comm_spec, graph_spec);
-
-    auto app = std::make_shared<APP_T<FRAG_T>>();
-    DoQuery<FRAG_T, APP_T<FRAG_T>, Args...>(fragment, app, comm_spec, dev_id,
-                                            out_prefix, args...);
-  }
+  graph_spec.partitioner_type =
+      grape::parse_partitioner_type_name(FLAGS_partitioner_type);
+  graph_spec.idxer_type = grape::parse_idxer_type_name(FLAGS_idxer_type);
+
+  using FRAG_T = grape::cuda::HostFragment<OID_T, VID_T, VDATA_T, EDATA_T,
+                                           load_strategy>;
+  std::shared_ptr<FRAG_T> fragment;
+  int dev_id = comm_spec.local_id();
+  int dev_count;
+
+  CHECK_CUDA(cudaGetDeviceCount(&dev_count));
+  CHECK_LE(comm_spec.local_num(), dev_count)
+      << "Only found " << dev_count << " GPUs, but " << comm_spec.local_num()
+      << " processes are launched";
+  CHECK_CUDA(cudaSetDevice(dev_id));
+  fragment = LoadGraph<FRAG_T>(efile, vfile, comm_spec, graph_spec);
+
+  auto app = std::make_shared<APP_T<FRAG_T>>();
+  DoQuery<FRAG_T, APP_T<FRAG_T>, Args...>(fragment, app, comm_spec, dev_id,
+                                          out_prefix, args...);
 }
 
 template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T>
diff --git a/examples/gnn_sampler/append_only_edgecut_fragment.h b/examples/gnn_sampler/append_only_edgecut_fragment.h
index 3a602db2..e3543114 100644
--- a/examples/gnn_sampler/append_only_edgecut_fragment.h
+++ b/examples/gnn_sampler/append_only_edgecut_fragment.h
@@ -26,10 +26,8 @@ limitations under the License.
 #include <vector>
 
 #include <grape/config.h>
-#include <grape/fragment/basic_fragment_loader.h>
 #include <grape/fragment/edgecut_fragment_base.h>
 #include <grape/fragment/fragment_base.h>
-#include <grape/fragment/partitioner.h>
 #include <grape/graph/adj_list.h>
 #include <grape/graph/edge.h>
 #include <grape/graph/vertex.h>
@@ -41,7 +39,7 @@ limitations under the License.
 #include <grape/utils/gcontainer.h>
 #include <grape/utils/iterator_pair.h>
 #include <grape/utils/vertex_array.h>
-#include <grape/vertex_map/global_vertex_map.h>
+#include <grape/vertex_map/vertex_map.h>
 #include <grape/worker/comm_spec.h>
 
 #include "flat_hash_map/flat_hash_map.hpp"
@@ -276,7 +274,6 @@ struct AppendOnlyEdgecutFragmentTraits {
   using sub_vertices_t = VertexVector<VID_T>;
   using fragment_adj_list_t = AdjList<VID_T, EDATA_T>;
   using fragment_const_adj_list_t = ConstAdjList<VID_T, EDATA_T>;
-  using vertex_map_t = GlobalVertexMap<OID_T, VID_T>;
   using mirror_vertices_t = std::vector<Vertex<VID_T>>;
 };
 
@@ -301,7 +298,7 @@ class AppendOnlyEdgecutFragment
   using oid_t = OID_T;
   using vdata_t = VDATA_T;
   using edata_t = EDATA_T;
-  using vertex_map_t = typename traits_t::vertex_map_t;
+  using vertex_map_t = VertexMap<OID_T, VID_T>;
   using nbr_space_iter_impl = NbrSpaceIterImpl<VID_T, EDATA_T>;
   using nbr_mapspace_iter_impl = NbrMapSpaceIterImpl<VID_T, EDATA_T>;
 
@@ -326,8 +323,8 @@ class AppendOnlyEdgecutFragment
   /** Constructor.
    * @param vm_ptr the vertex map.
    */
-  explicit AppendOnlyEdgecutFragment(std::shared_ptr<vertex_map_t> vm_ptr)
-      : FragmentBase<OID_T, VID_T, VDATA_T, EDATA_T, traits_t>(vm_ptr) {}
+  AppendOnlyEdgecutFragment()
+      : FragmentBase<OID_T, VID_T, VDATA_T, EDATA_T, traits_t>() {}
 
   virtual ~AppendOnlyEdgecutFragment() {}
 
@@ -336,9 +333,11 @@ class AppendOnlyEdgecutFragment
   using base_t::InnerVertexGid2Lid;
   using base_t::IsInnerVertexGid;
   static std::string type_info() { return ""; }
-  void Init(fid_t fid, bool directed, std::vector<internal_vertex_t>& vertices,
+  void Init(const CommSpec& comm_spec, bool directed,
+            std::unique_ptr<VertexMap<OID_T, VID_T>>&& vm_ptr,
+            std::vector<internal_vertex_t>& vertices,
             std::vector<edge_t>& edges) override {
-    init(fid, directed);
+    init(comm_spec.fid(), directed, std::move(vm_ptr));
 
     ovnum_ = 0;
     oenum_ = 0;
@@ -376,12 +375,12 @@ class AppendOnlyEdgecutFragment
     }
     tvnum_ = ivnum_ + ovnum_;
     max_old_ilid_ = ivnum_;
-    min_old_olid_ = id_parser_.max_local_id() - ovnum_;
+    min_old_olid_ = id_parser_.max_local_id() - ovnum_ + 1;
     this->inner_vertices_.SetRange(0, ivnum_);
-    this->outer_vertices_.SetRange(id_parser_.max_local_id() - ovnum_,
-                                   id_parser_.max_local_id());
-    this->vertices_.SetRange(0, ivnum_, id_parser_.max_local_id() - ovnum_,
-                             id_parser_.max_local_id());
+    this->outer_vertices_.SetRange(id_parser_.max_local_id() - ovnum_ + 1,
+                                   id_parser_.max_local_id() + 1);
+    this->vertices_.SetRange(0, ivnum_, id_parser_.max_local_id() - ovnum_ + 1,
+                             id_parser_.max_local_id() + 1);
 
     {
       std::vector<int> odegree(ivnum_, 0);
@@ -500,11 +499,13 @@ class AppendOnlyEdgecutFragment
     std::vector<edge_t> edges;
     edges.reserve(edge_messages.size());
     std::vector<oid_t> empty_id_list;
-    auto& partitioner = vm_ptr_->GetPartitioner();
     {
       edata_t e_data;
       oid_t src, dst, src_gid, dst_gid;
       fid_t src_fid, dst_fid;
+      std::vector<Edge<OID_T, EDATA_T>> edge_list;
+      edge_list.reserve(edge_messages.size());
+      std::vector<OID_T> local_vertices_to_add;
       auto line_parser_ptr =
           std::make_shared<TSVLineParser<oid_t, vdata_t, edata_t>>();
       for (auto& msg : edge_messages) {
@@ -517,12 +518,26 @@ class AppendOnlyEdgecutFragment
           LOG(ERROR) << e.what();
           continue;
         }
-        src_fid = partitioner.GetPartitionId(src);
-        dst_fid = partitioner.GetPartitionId(dst);
-        vm_ptr_->AddVertex(src, src_gid);
-        vm_ptr_->AddVertex(dst, dst_gid);
-        if (src_fid == fid_ || dst_fid == fid_) {
-          edges.emplace_back(src_gid, dst_gid, e_data);
+        src_fid = vm_ptr_->GetFragmentId(src);
+        dst_fid = vm_ptr_->GetFragmentId(dst);
+        if (src_fid == fid_) {
+          if (!vm_ptr_->GetGid(src, src_gid)) {
+            local_vertices_to_add.push_back(src);
+          }
+          edge_list.emplace_back(src, dst, e_data);
+        } else if (dst_fid == fid_) {
+          if (!vm_ptr_->GetGid(dst, dst_gid)) {
+            local_vertices_to_add.push_back(dst);
+          }
+          edge_list.emplace_back(src, dst, e_data);
+        }
+      }
+
+      vm_ptr_->ExtendVertices(comm_spec, std::move(local_vertices_to_add));
+      for (auto& e : edge_list) {
+        if (vm_ptr_->GetGid(e.src, src_gid) &&
+            vm_ptr_->GetGid(e.dst, dst_gid)) {
+          edges.emplace_back(src_gid, dst_gid, e.edata);
         }
       }
     }
@@ -573,10 +588,11 @@ class AppendOnlyEdgecutFragment
         }
       }
       this->inner_vertices_.SetRange(0, ivnum_);
-      this->outer_vertices_.SetRange(id_parser_.max_local_id() - ovnum_,
-                                     id_parser_.max_local_id());
-      this->vertices_.SetRange(0, ivnum_, id_parser_.max_local_id() - ovnum_,
-                               id_parser_.max_local_id());
+      this->outer_vertices_.SetRange(id_parser_.max_local_id() - ovnum_ + 1,
+                                     id_parser_.max_local_id() + 1);
+      this->vertices_.SetRange(0, ivnum_,
+                               id_parser_.max_local_id() - ovnum_ + 1,
+                               id_parser_.max_local_id() + 1);
       tvnum_ = ivnum_ + ovnum_;
       ovgid_.resize(ovnum_);
       memcpy(&ovgid_[old_ovnum], &ov_to_extend[0],
@@ -605,7 +621,7 @@ class AppendOnlyEdgecutFragment
     InArchive ia;
 
     vid_t xivnum = max_old_ilid_;
-    vid_t xovnum = id_parser_.max_local_id() - min_old_olid_;
+    vid_t xovnum = id_parser_.max_local_id() - min_old_olid_ + 1;
 
     ia << xivnum << xovnum << oenum_;
     io_adaptor->WriteArchive(ia);
@@ -646,10 +662,13 @@ class AppendOnlyEdgecutFragment
   }
 
   template <typename IOADAPTOR_T>
-  void Deserialize(const std::string prefix, const fid_t fid) {
+  void Deserialize(const CommSpec& comm_spec,
+                   std::unique_ptr<VertexMap<OID_T, VID_T>>&& vm_ptr,
+                   const std::string prefix) {
+    vm_ptr_ = std::move(vm_ptr);
     char fbuf[1024];
     snprintf(fbuf, sizeof(fbuf), kSerializationFilenameFormat, prefix.c_str(),
-             fid);
+             comm_spec.fid());
     VLOG(1) << "Deserialize from " << fbuf;
     auto io_adaptor =
         std::unique_ptr<IOADAPTOR_T>(new IOADAPTOR_T(std::string(fbuf)));
@@ -700,15 +719,15 @@ class AppendOnlyEdgecutFragment
     io_adaptor->Close();
 
     max_old_ilid_ = ivnum_;
-    min_old_olid_ = id_parser_.max_local_id() - ovnum_;
+    min_old_olid_ = id_parser_.max_local_id() - ovnum_ + 1;
     extra_oenum_ = 0;
     extra_oe_.clear();
     extra_oe_.resize(ivnum_, -1);
     this->inner_vertices_.SetRange(0, ivnum_);
-    this->outer_vertices_.SetRange(id_parser_.max_local_id() - ovnum_,
-                                   id_parser_.max_local_id());
-    this->vertices_.SetRange(0, ivnum_, id_parser_.max_local_id() - ovnum_,
-                             id_parser_.max_local_id());
+    this->outer_vertices_.SetRange(id_parser_.max_local_id() - ovnum_ + 1,
+                                   id_parser_.max_local_id() + 1);
+    this->vertices_.SetRange(0, ivnum_, id_parser_.max_local_id() - ovnum_ + 1,
+                             id_parser_.max_local_id() + 1);
 
     initOuterVerticesOfFragment();
   }
diff --git a/examples/gnn_sampler/run_sampler.cc b/examples/gnn_sampler/run_sampler.cc
index 6e5a44ea..fa3bb02c 100644
--- a/examples/gnn_sampler/run_sampler.cc
+++ b/examples/gnn_sampler/run_sampler.cc
@@ -24,7 +24,6 @@ limitations under the License.
 #include <glog/logging.h>
 
 #include <grape/fragment/loader.h>
-#include <grape/fragment/partitioner.h>
 #include <grape/grape.h>
 
 #include "append_only_edgecut_fragment.h"
diff --git a/grape/app/mutation_context.h b/grape/app/mutation_context.h
index 783b58f6..0692633f 100644
--- a/grape/app/mutation_context.h
+++ b/grape/app/mutation_context.h
@@ -66,20 +66,29 @@ class MutationContext : public ContextBase {
 
   void add_vertex(const oid_t& id, const vdata_t& data) {
     fid_t fid = partitioner_.GetPartitionId(id);
-    id_to_add_[fid].push_back(id);
-    vdata_to_add_[fid].push_back(data);
+    if (fid == fragment_.fnum()) {
+      LOG(ERROR) << "add vertex - " << id << " failed, unknwon partition id";
+    } else {
+      id_to_add_[fid].push_back(id);
+      vdata_to_add_[fid].push_back(data);
+    }
   }
 
   void add_edge(const oid_t& src, const oid_t& dst, const edata_t& data) {
     fid_t src_fid = partitioner_.GetPartitionId(src);
     fid_t dst_fid = partitioner_.GetPartitionId(dst);
-    esrc_to_add_[src_fid].push_back(src);
-    edst_to_add_[src_fid].push_back(dst);
-    edata_to_add_[src_fid].push_back(data);
-    if (src_fid != dst_fid) {
-      esrc_to_add_[dst_fid].push_back(src);
-      edst_to_add_[dst_fid].push_back(dst);
-      edata_to_add_[dst_fid].push_back(data);
+    if (src_fid == fragment_.fnum() || dst_fid == fragment_.fnum()) {
+      LOG(ERROR) << "add edge - " << src << " -> " << dst
+                 << " failed, unknwon partition id";
+    } else {
+      esrc_to_add_[src_fid].push_back(src);
+      edst_to_add_[src_fid].push_back(dst);
+      edata_to_add_[src_fid].push_back(data);
+      if (src_fid != dst_fid) {
+        esrc_to_add_[dst_fid].push_back(src);
+        edst_to_add_[dst_fid].push_back(dst);
+        edata_to_add_[dst_fid].push_back(data);
+      }
     }
   }
 
@@ -95,8 +104,13 @@ class MutationContext : public ContextBase {
       parsed_vertices_to_update_.emplace_back(gid, data);
     } else {
       fid_t fid = partitioner_.GetPartitionId(id);
-      id_to_update_[fid].push_back(id);
-      vdata_to_update_[fid].push_back(data);
+      if (fid == fragment_.fnum()) {
+        LOG(ERROR) << "update vertex - " << id
+                   << " failed, unknwon partition id";
+      } else {
+        id_to_update_[fid].push_back(id);
+        vdata_to_update_[fid].push_back(data);
+      }
     }
   }
 
@@ -107,13 +121,18 @@ class MutationContext : public ContextBase {
   void update_edge(const oid_t& src, const oid_t& dst, const edata_t& data) {
     fid_t src_fid = partitioner_.GetPartitionId(src);
     fid_t dst_fid = partitioner_.GetPartitionId(dst);
-    esrc_to_update_[src_fid].push_back(src);
-    edst_to_update_[src_fid].push_back(dst);
-    edata_to_update_[src_fid].push_back(data);
-    if (src_fid != dst_fid) {
-      esrc_to_update_[dst_fid].push_back(src);
-      edst_to_update_[dst_fid].push_back(dst);
-      edata_to_update_[dst_fid].push_back(data);
+    if (src_fid == fragment_.fnum() || dst_fid == fragment_.fnum()) {
+      LOG(ERROR) << "update edge - " << src << " -> " << dst
+                 << " failed, unknwon partition id";
+    } else {
+      esrc_to_update_[src_fid].push_back(src);
+      edst_to_update_[src_fid].push_back(dst);
+      edata_to_update_[src_fid].push_back(data);
+      if (src_fid != dst_fid) {
+        esrc_to_update_[dst_fid].push_back(src);
+        edst_to_update_[dst_fid].push_back(dst);
+        edata_to_update_[dst_fid].push_back(data);
+      }
     }
   }
 
@@ -130,7 +149,12 @@ class MutationContext : public ContextBase {
       parsed_vid_to_remove_.push_back(gid);
     } else {
       fid_t fid = partitioner_.GetPartitionId(id);
-      id_to_remove_[fid].push_back(id);
+      if (fid == fragment_.fnum()) {
+        LOG(ERROR) << "remove vertex - " << id
+                   << " failed, unknwon partition id";
+      } else {
+        id_to_remove_[fid].push_back(id);
+      }
     }
   }
 
@@ -141,11 +165,16 @@ class MutationContext : public ContextBase {
   void remove_edge(const oid_t& src, const oid_t& dst) {
     fid_t src_fid = partitioner_.GetPartitionId(src);
     fid_t dst_fid = partitioner_.GetPartitionId(dst);
-    esrc_to_remove_[src_fid].push_back(src);
-    edst_to_remove_[src_fid].push_back(dst);
-    if (src_fid != dst_fid) {
-      esrc_to_remove_[dst_fid].push_back(src);
-      edst_to_remove_[dst_fid].push_back(dst);
+    if (src_fid == fragment_.fnum() || dst_fid == fragment_.fnum()) {
+      LOG(ERROR) << "remove edge - " << src << " -> " << dst
+                 << " failed, unknwon partition id";
+    } else {
+      esrc_to_remove_[src_fid].push_back(src);
+      edst_to_remove_[src_fid].push_back(dst);
+      if (src_fid != dst_fid) {
+        esrc_to_remove_[dst_fid].push_back(src);
+        edst_to_remove_[dst_fid].push_back(dst);
+      }
     }
   }
 
diff --git a/grape/communication/sync_comm.h b/grape/communication/sync_comm.h
index 2cf356dc..2417fb29 100644
--- a/grape/communication/sync_comm.h
+++ b/grape/communication/sync_comm.h
@@ -391,6 +391,68 @@ struct CommImpl<std::vector<T>,
   }
 };
 
+template <class T, class ALLOC_T>
+struct CommImpl<Array<T, ALLOC_T>,
+                typename std::enable_if<std::is_pod<T>::value>::type> {
+  static void send(const Array<T, ALLOC_T>& vec, int dst_worker_id, int tag,
+                   MPI_Comm comm) {
+    int64_t len = vec.size();
+    CommImpl<int64_t>::send(len, dst_worker_id, tag, comm);
+    if (len > 0) {
+      send_buffer<T>(vec.data(), vec.size(), dst_worker_id, tag, comm);
+    }
+  }
+
+  static void send_partial(const Array<T, ALLOC_T>& vec, size_t from, size_t to,
+                           int dst_worker_id, int tag, MPI_Comm comm) {
+    int64_t len = to - from;
+    CommImpl<int64_t>::send(len, dst_worker_id, tag, comm);
+    if (len > 0) {
+      send_buffer<T>(vec.data() + from, len, dst_worker_id, tag, comm);
+    }
+  }
+
+  static void recv(Array<T, ALLOC_T>& vec, int src_worker_id, int tag,
+                   MPI_Comm comm) {
+    int64_t len;
+    CommImpl<int64_t>::recv(len, src_worker_id, tag, comm);
+    vec.resize(len);
+    if (len > 0) {
+      recv_buffer<T>(vec.data(), vec.size(), src_worker_id, tag, comm);
+    }
+  }
+
+  static void recv_at(Array<T, ALLOC_T>& vec, size_t offset, int src_worker_id,
+                      int tag, MPI_Comm comm) {
+    int64_t len;
+    CommImpl<int64_t>::recv(len, src_worker_id, tag, comm);
+    if (offset + len > vec.size()) {
+      vec.resize(offset + len);
+    }
+    if (len > 0) {
+      recv_buffer<T>(vec.data() + offset, len, src_worker_id, tag, comm);
+    }
+  }
+
+  template <typename ITER_T>
+  static void multiple_send(const Array<T, ALLOC_T>& vec,
+                            const ITER_T& worker_id_begin,
+                            const ITER_T& worker_id_end, int tag,
+                            MPI_Comm comm) {
+    for (ITER_T iter = worker_id_begin; iter != worker_id_end; ++iter) {
+      int dst_worker_id = *iter;
+      send(vec, dst_worker_id, tag, comm);
+    }
+  }
+
+  static void bcast(Array<T, ALLOC_T>& vec, int root, MPI_Comm comm) {
+    int64_t len = vec.size();
+    bcast_small_buffer<int64_t>(&len, 1, root, comm);
+    vec.resize(len);
+    bcast_buffer<T>(vec.data(), len, root, comm);
+  }
+};
+
 template <>
 struct CommImpl<InArchive, void> {
   static void send(const InArchive& arc, int dst_worker_id, int tag,
@@ -751,7 +813,7 @@ typename std::enable_if<std::is_pod<T>::value>::type FlatAllGather(
                    global.data(), counts.data(), displs.data(), MPI_CHAR, comm);
   } else {
     std::vector<MPI_Request> reqs;
-    std::vector<int64_t> offsets;
+    std::vector<int64_t> offsets(worker_num);
     int64_t sum = 0;
     for (int i = 0; i < worker_num; ++i) {
       offsets[i] = sum;
diff --git a/grape/cuda/fragment/device_fragment.h b/grape/cuda/fragment/device_fragment.h
index 48765d8c..4c7d42bb 100644
--- a/grape/cuda/fragment/device_fragment.h
+++ b/grape/cuda/fragment/device_fragment.h
@@ -27,7 +27,7 @@ limitations under the License.
 namespace grape {
 namespace cuda {
 template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T,
-          grape::LoadStrategy _load_strategy, typename VERTEX_MAP_T>
+          grape::LoadStrategy _load_strategy>
 class HostFragment;
 
 namespace dev {
@@ -445,8 +445,7 @@ class DeviceFragment {
   ArrayView<ArrayView<vertex_t>> mirrors_of_frag_;
 
   template <typename _OID_T, typename _VID_T, typename _VDATA_T,
-            typename _EDATA_T, grape::LoadStrategy __load_strategy,
-            typename _VERTEX_MAP_T>
+            typename _EDATA_T, grape::LoadStrategy __load_strategy>
   friend class grape::cuda::HostFragment;
 };
 
diff --git a/grape/cuda/fragment/host_fragment.h b/grape/cuda/fragment/host_fragment.h
index 0127d838..dbb4ec8b 100644
--- a/grape/cuda/fragment/host_fragment.h
+++ b/grape/cuda/fragment/host_fragment.h
@@ -43,7 +43,6 @@ limitations under the License.
 #include "grape/types.h"
 #include "grape/util.h"
 #include "grape/utils/vertex_array.h"
-#include "grape/vertex_map/global_vertex_map.h"
 
 namespace grape {
 namespace cuda {
@@ -65,14 +64,12 @@ inline void CalculateOffsetWithPrefixSum(const Stream& stream,
 }
 
 template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T,
-          grape::LoadStrategy _load_strategy = grape::LoadStrategy::kOnlyOut,
-          typename VERTEX_MAP_T = GlobalVertexMap<OID_T, VID_T>>
-class HostFragment
-    : public ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                      _load_strategy, VERTEX_MAP_T> {
+          grape::LoadStrategy _load_strategy = grape::LoadStrategy::kOnlyOut>
+class HostFragment : public ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T,
+                                                     EDATA_T, _load_strategy> {
  public:
-  using base_t = ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                          _load_strategy, VERTEX_MAP_T>;
+  using base_t =
+      ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T, _load_strategy>;
   using internal_vertex_t = typename base_t::internal_vertex_t;
   using edge_t = typename base_t::edge_t;
   using nbr_t = typename base_t::nbr_t;
@@ -86,8 +83,7 @@ class HostFragment
   using edata_t = EDATA_T;
   using vertex_range_t = typename base_t::vertex_range_t;
 
-  using vertex_map_t = typename base_t::vertex_map_t;
-  using dev_vertex_map_t = cuda::DeviceVertexMap<vertex_map_t>;
+  using dev_vertex_map_t = cuda::DeviceVertexMap<VertexMap<OID_T, VID_T>>;
   using inner_vertices_t = typename base_t::inner_vertices_t;
   using outer_vertices_t = typename base_t::outer_vertices_t;
   using device_t =
@@ -99,15 +95,14 @@ class HostFragment
 
   static constexpr grape::LoadStrategy load_strategy = _load_strategy;
 
-  HostFragment() = default;
+  HostFragment() : FragmentBase<OID_T, VID_T, VDATA_T, EDATA_T, traits_t>() {}
 
-  explicit HostFragment(std::shared_ptr<vertex_map_t> vm_ptr)
-      : FragmentBase<OID_T, VID_T, VDATA_T, EDATA_T, traits_t>(vm_ptr) {}
-
-  void Init(fid_t fid, bool directed, std::vector<internal_vertex_t>& vertices,
+  void Init(const CommSpec& comm_spec, bool directed,
+            std::unique_ptr<VertexMap<OID_T, VID_T>>&& vm_ptr,
+            std::vector<internal_vertex_t>& vertices,
             std::vector<edge_t>& edges) {
-    base_t::Init(fid, directed, vertices, edges);
-    __allocate_device_fragment__();
+    base_t::Init(comm_spec, directed, std::move(vm_ptr), vertices, edges);
+    __allocate_device_fragment__(comm_spec.local_id());
   }
 
   template <typename IOADAPTOR_T>
@@ -116,9 +111,12 @@ class HostFragment
   }
 
   template <typename IOADAPTOR_T>
-  void Deserialize(const std::string& prefix, const fid_t fid) {
-    base_t::template Deserialize<IOADAPTOR_T>(prefix, fid);
-    __allocate_device_fragment__();
+  void Deserialize(const CommSpec& comm_spec,
+                   std::unique_ptr<VertexMap<OID_T, VID_T>>&& vm_ptr,
+                   const std::string& prefix) {
+    base_t::template Deserialize<IOADAPTOR_T>(comm_spec, std::move(vm_ptr),
+                                              prefix);
+    __allocate_device_fragment__(comm_spec.local_id());
   }
 
   void PrepareToRunApp(const CommSpec& comm_spec, PrepareConf conf) {
@@ -135,7 +133,6 @@ class HostFragment
     }
 
     if (conf.need_split_edges || conf.need_split_edges_by_fragment) {
-      auto& comm_spec = vm_ptr_->GetCommSpec();
       auto& ie = ie_.get_edges();
       auto& ieoffset = ie_.get_offsets();
       auto& oe = oe_.get_edges();
@@ -161,7 +158,7 @@ class HostFragment
                                    stream.cuda_stream()));
 
         auto prefix_sum = compute_prefix_sum(ieoffset);
-        ArrayView<VID_T> d_prefix_sum(prefix_sum);
+        ArrayView<VID_T> d_prefix_sum(prefix_sum.data(), prefix_sum.size());
 
         CalculateOffsetWithPrefixSum<nbr_t, vid_t>(
             stream, d_prefix_sum, thrust::raw_pointer_cast(d_ie_.data()),
@@ -176,7 +173,7 @@ class HostFragment
                                    stream.cuda_stream()));
 
         auto prefix_sum = compute_prefix_sum(oeoffset);
-        ArrayView<VID_T> d_prefix_sum(prefix_sum);
+        ArrayView<VID_T> d_prefix_sum(prefix_sum.data(), prefix_sum.size());
 
         CalculateOffsetWithPrefixSum<nbr_t, vid_t>(
             stream, d_prefix_sum, thrust::raw_pointer_cast(d_oe_.data()),
@@ -211,7 +208,7 @@ class HostFragment
     }
 
     if (conf.need_build_device_vm) {
-      d_vm_ptr_->Init(stream);
+      d_vm_ptr_->Init(stream, comm_spec, vm_ptr_);
     }
     stream.Sync();
   }
@@ -321,18 +318,17 @@ class HostFragment
     return dev_frag;
   }
 
-  void __allocate_device_fragment__() {
-    auto& comm_spec = vm_ptr_->GetCommSpec();
+  void __allocate_device_fragment__(int local_id) {
     auto& ie = ie_.get_edges();
     auto& ieoffset = ie_.get_offsets();
     auto& oe = oe_.get_edges();
     auto& oeoffset = oe_.get_offsets();
 
-    int dev_id = comm_spec.local_id();
+    int dev_id = local_id;
     CHECK_CUDA(cudaSetDevice(dev_id));
     Stream stream;
 
-    d_vm_ptr_ = std::make_shared<dev_vertex_map_t>(vm_ptr_);
+    d_vm_ptr_ = std::make_shared<dev_vertex_map_t>();
     auto offset_size = ivnum_ + ovnum_ + 1;
     auto compute_prefix_sum =
         [offset_size](
@@ -354,7 +350,7 @@ class HostFragment
                                  cudaMemcpyHostToDevice, stream.cuda_stream()));
 
       auto prefix_sum = compute_prefix_sum(ieoffset);
-      ArrayView<VID_T> d_prefix_sum(prefix_sum);
+      ArrayView<VID_T> d_prefix_sum(prefix_sum.data(), prefix_sum.size());
 
       CalculateOffsetWithPrefixSum<nbr_t, vid_t>(
           stream, d_prefix_sum, thrust::raw_pointer_cast(d_ie_.data()),
@@ -370,7 +366,7 @@ class HostFragment
                                  cudaMemcpyHostToDevice, stream.cuda_stream()));
 
       auto prefix_sum = compute_prefix_sum(oeoffset);
-      ArrayView<VID_T> d_prefix_sum(prefix_sum);
+      ArrayView<VID_T> d_prefix_sum(prefix_sum.data(), prefix_sum.size());
 
       CalculateOffsetWithPrefixSum<nbr_t, vid_t>(
           stream, d_prefix_sum, thrust::raw_pointer_cast(d_oe_.data()),
@@ -414,7 +410,6 @@ class HostFragment
           [] __device__(VID_T * gids, VID_T * lids, VID_T size,
                         CUDASTL::HashMap<VID_T, VID_T> * ovg2l) {
             auto tid = TID_1D;
-            gids = thrust::raw_pointer_cast(gids);
             auto nthreads = TOTAL_THREADS_1D;
 
             for (VID_T idx = 0 + tid; idx < size; idx += nthreads) {
@@ -424,8 +419,7 @@ class HostFragment
               (*ovg2l)[gid] = lid;
             }
           },
-          thrust::raw_pointer_cast(gids.data()),
-          thrust::raw_pointer_cast(lids.data()), size, d_ovg2l_.get());
+          gids.data(), lids.data(), size, d_ovg2l_.get());
     }
 
     d_mirrors_of_frag_holder_.resize(fnum_);
@@ -635,7 +629,7 @@ class HostFragment
       thrust::device_vector<fid_t>& d_fid_list,
       thrust::device_vector<fid_t*>& d_fid_list_offset) {
     pinned_vector<size_t> prefix_sum(ivnum_ + 1, 0);
-    ArrayView<size_t> d_prefix_sum(prefix_sum);
+    ArrayView<size_t> d_prefix_sum(prefix_sum.data(), prefix_sum.size());
 
     for (VID_T i = 0; i < ivnum_; ++i) {
       prefix_sum[i + 1] =
diff --git a/grape/cuda/vertex_map/device_vertex_map.h b/grape/cuda/vertex_map/device_vertex_map.h
index dbb7e03d..c71d4767 100644
--- a/grape/cuda/vertex_map/device_vertex_map.h
+++ b/grape/cuda/vertex_map/device_vertex_map.h
@@ -23,7 +23,6 @@ limitations under the License.
 #include "grape/cuda/utils/launcher.h"
 #include "grape/cuda/utils/stream.h"
 #include "grape/fragment/id_parser.h"
-#include "grape/vertex_map/global_vertex_map.h"
 
 namespace grape {
 namespace cuda {
@@ -98,23 +97,22 @@ class DeviceVertexMap {
   using VID_T = typename HOST_VM_T::vid_t;
 
  public:
-  explicit DeviceVertexMap(std::shared_ptr<HOST_VM_T> vm_ptr)
-      : vm_ptr_(vm_ptr) {}
+  DeviceVertexMap() {}
 
-  void Init(const Stream& stream) {
-    auto& comm_spec = vm_ptr_->GetCommSpec();
-    fid_t fnum = comm_spec.fnum();
+  void Init(const Stream& stream, const CommSpec& comm_spec,
+            std::unique_ptr<HOST_VM_T>& vm_ptr) {
+    fnum_ = comm_spec.fnum();
     int dev_id = comm_spec.local_id();
 
     CHECK_CUDA(cudaSetDevice(dev_id));
 
-    id_parser_.init(fnum);
-    d_o2l_.resize(fnum);
-    d_l2o_.resize(fnum);
-    d_l2o_ptr_.resize(fnum);
+    id_parser_.init(fnum_);
+    d_o2l_.resize(fnum_);
+    d_l2o_.resize(fnum_);
+    d_l2o_ptr_.resize(fnum_);
 
-    for (fid_t fid = 0; fid < fnum; fid++) {
-      auto ivnum = vm_ptr_->GetInnerVertexSize(fid);
+    for (fid_t fid = 0; fid < fnum_; fid++) {
+      auto ivnum = vm_ptr->GetInnerVertexSize(fid);
       // TODO(liang): replace this
       d_o2l_[fid] =
           CUDASTL::CreateHashMap<OID_T, VID_T, CUDASTL::HashFunc<OID_T>>(
@@ -124,7 +122,7 @@ class DeviceVertexMap {
 
       for (size_t lid = 0; lid < ivnum; lid++) {
         OID_T oid;
-        CHECK(vm_ptr_->GetOid(fid, lid, oid));
+        CHECK(vm_ptr->GetOid(fid, lid, oid));
         oids[lid] = oid;
       }
 
@@ -141,17 +139,16 @@ class DeviceVertexMap {
               (*o2l)[oid] = lid;
             }
           },
-          thrust::raw_pointer_cast(oids.data()), ivnum, d_o2l_[fid]);
+          oids.data(), ivnum, d_o2l_[fid]);
       d_l2o_[fid].assign(oids.begin(), oids.end());
       d_l2o_ptr_[fid] = ArrayView<OID_T>(d_l2o_[fid]);
     }
   }
 
   dev::DeviceVertexMap<OID_T, VID_T> DeviceObject() {
-    auto& comm_spec = vm_ptr_->GetCommSpec();
     dev::DeviceVertexMap<OID_T, VID_T> dev_vm;
 
-    dev_vm.fnum_ = comm_spec.fnum();
+    dev_vm.fnum_ = fnum_;
     dev_vm.id_parser_ = id_parser_;
 
     // if device vm is built
@@ -163,7 +160,6 @@ class DeviceVertexMap {
   }
 
  private:
-  std::shared_ptr<HOST_VM_T> vm_ptr_;
   IdParser<VID_T> id_parser_;
   // l2o for per device
   thrust::device_vector<
@@ -171,6 +167,8 @@ class DeviceVertexMap {
       d_o2l_;
   std::vector<thrust::device_vector<OID_T>> d_l2o_;
   thrust::device_vector<ArrayView<OID_T>> d_l2o_ptr_;
+
+  fid_t fnum_;
 };
 }  // namespace cuda
 
diff --git a/grape/fragment/basic_efile_fragment_loader.h b/grape/fragment/basic_efile_fragment_loader.h
new file mode 100644
index 00000000..8f26088f
--- /dev/null
+++ b/grape/fragment/basic_efile_fragment_loader.h
@@ -0,0 +1,185 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_FRAGMENT_BASIC_EFILE_FRAGMENT_LOADER_H_
+#define GRAPE_FRAGMENT_BASIC_EFILE_FRAGMENT_LOADER_H_
+
+#include "grape/communication/shuffle.h"
+#include "grape/fragment/basic_fragment_loader_base.h"
+#include "grape/fragment/rebalancer.h"
+#include "grape/graph/edge.h"
+#include "grape/graph/vertex.h"
+#include "grape/vertex_map/vertex_map.h"
+
+namespace grape {
+
+template <typename FRAG_T>
+class BasicEFileFragmentLoader : public BasicFragmentLoaderBase<FRAG_T> {
+  using fragment_t = FRAG_T;
+  using oid_t = typename fragment_t::oid_t;
+  using internal_oid_t = typename InternalOID<oid_t>::type;
+  using vid_t = typename fragment_t::vid_t;
+  using vdata_t = typename fragment_t::vdata_t;
+  using edata_t = typename fragment_t::edata_t;
+
+ public:
+  explicit BasicEFileFragmentLoader(const CommSpec& comm_spec,
+                                    const LoadGraphSpec& spec)
+      : BasicFragmentLoaderBase<FRAG_T>(comm_spec, spec) {
+    if (spec_.partitioner_type != PartitionerType::kHashPartitioner) {
+      LOG(ERROR) << "Only hash partitioner is supported in "
+                    "BasicEFileFragmentLoader";
+      spec_.partitioner_type = PartitionerType::kHashPartitioner;
+    }
+    if (spec_.rebalance) {
+      LOG(ERROR) << "Rebalance is not supported in BasicEFileFragmentLoader";
+      spec_.rebalance = false;
+    }
+    partitioner_ = std::unique_ptr<HashPartitioner<oid_t>>(
+        new HashPartitioner<oid_t>(comm_spec_.fnum()));
+    edges_to_frag_.resize(comm_spec_.fnum());
+    for (fid_t fid = 0; fid < comm_spec_.fnum(); ++fid) {
+      int worker_id = comm_spec_.FragToWorker(fid);
+      edges_to_frag_[fid].Init(comm_spec_.comm(), edge_tag, 4096000);
+      edges_to_frag_[fid].SetDestination(worker_id, fid);
+      if (worker_id == comm_spec_.worker_id()) {
+        edges_to_frag_[fid].DisableComm();
+      }
+    }
+
+    edge_recv_thread_ =
+        std::thread(&BasicEFileFragmentLoader::edgeRecvRoutine, this);
+    recv_thread_running_ = true;
+  }
+
+  ~BasicEFileFragmentLoader() {
+    if (recv_thread_running_) {
+      for (auto& ea : edges_to_frag_) {
+        ea.Flush();
+      }
+      edge_recv_thread_.join();
+    }
+  }
+
+  void AddVertex(const oid_t& id, const vdata_t& data) override {}
+
+  void ConstructVertices() override {}
+
+  void AddEdge(const oid_t& src, const oid_t& dst,
+               const edata_t& data) override {
+    internal_oid_t internal_src(src);
+    internal_oid_t internal_dst(dst);
+    fid_t src_fid = partitioner_->GetPartitionId(internal_src);
+    fid_t dst_fid = partitioner_->GetPartitionId(internal_dst);
+    if (src_fid == comm_spec_.fnum() || dst_fid == comm_spec_.fnum()) {
+      LOG(ERROR) << "Unknown partition id for edge " << src << " -> " << dst;
+    } else {
+      edges_to_frag_[src_fid].Emplace(internal_src, internal_dst, data);
+      if (src_fid != dst_fid) {
+        edges_to_frag_[dst_fid].Emplace(internal_src, internal_dst, data);
+      }
+    }
+  }
+
+  void ConstructFragment(std::shared_ptr<fragment_t>& fragment) override {
+    for (auto& ea : edges_to_frag_) {
+      ea.Flush();
+    }
+
+    edge_recv_thread_.join();
+    recv_thread_running_ = false;
+
+    MPI_Barrier(comm_spec_.comm());
+    got_edges_.emplace_back(
+        std::move(edges_to_frag_[comm_spec_.fid()].buffers()));
+    edges_to_frag_[comm_spec_.fid()].Clear();
+
+    std::unique_ptr<VertexMap<oid_t, vid_t>> vm_ptr(
+        new VertexMap<oid_t, vid_t>());
+    {
+      VertexMapBuilder<oid_t, vid_t> builder(
+          comm_spec_.fid(), comm_spec_.fnum(), std::move(partitioner_),
+          spec_.idxer_type);
+      for (auto& buffers : got_edges_) {
+        foreach_helper(
+            buffers,
+            [&builder](const internal_oid_t& src, const internal_oid_t& dst) {
+              builder.add_vertex(src);
+              builder.add_vertex(dst);
+            },
+            make_index_sequence<2>{});
+      }
+      builder.finish(comm_spec_, *vm_ptr);
+    }
+
+    std::vector<Edge<vid_t, edata_t>> processed_edges;
+    for (auto& buffers : got_edges_) {
+      foreach_rval(buffers, [&processed_edges, &vm_ptr](internal_oid_t&& src,
+                                                        internal_oid_t&& dst,
+                                                        edata_t&& data) {
+        vid_t src_gid, dst_gid;
+        if (vm_ptr->GetGid(oid_t(src), src_gid) &&
+            vm_ptr->GetGid(oid_t(dst), dst_gid)) {
+          processed_edges.emplace_back(src_gid, dst_gid, std::move(data));
+        }
+      });
+    }
+
+    fragment = std::make_shared<fragment_t>();
+    std::vector<internal::Vertex<vid_t, vdata_t>> fake_vertices;
+    fragment->Init(comm_spec_, spec_.directed, std::move(vm_ptr), fake_vertices,
+                   processed_edges);
+
+    this->InitOuterVertexData(fragment);
+  }
+
+ private:
+  void edgeRecvRoutine() {
+    ShuffleIn<internal_oid_t, internal_oid_t, edata_t> data_in;
+    data_in.Init(comm_spec_.fnum(), comm_spec_.comm(), edge_tag);
+    fid_t dst_fid;
+    int src_worker_id;
+    while (!data_in.Finished()) {
+      src_worker_id = data_in.Recv(dst_fid);
+      if (src_worker_id == -1) {
+        break;
+      }
+      CHECK_EQ(dst_fid, comm_spec_.fid());
+      got_edges_.emplace_back(std::move(data_in.buffers()));
+      data_in.Clear();
+    }
+  }
+
+  std::unique_ptr<HashPartitioner<oid_t>> partitioner_;
+
+  std::vector<ShuffleOut<internal_oid_t, internal_oid_t, edata_t>>
+      edges_to_frag_;
+
+  std::thread edge_recv_thread_;
+  bool recv_thread_running_;
+
+  std::vector<ShuffleBufferTuple<internal_oid_t, internal_oid_t, edata_t>>
+      got_edges_;
+
+  using BasicFragmentLoaderBase<FRAG_T>::comm_spec_;
+  using BasicFragmentLoaderBase<FRAG_T>::spec_;
+  using BasicFragmentLoaderBase<FRAG_T>::id_parser_;
+
+  using BasicFragmentLoaderBase<FRAG_T>::edge_tag;
+};
+
+};  // namespace grape
+
+#endif  // GRAPE_FRAGMENT_BASIC_EFILE_FRAGMENT_LOADER_H_
diff --git a/grape/fragment/basic_fragment_loader.h b/grape/fragment/basic_fragment_loader.h
index 28c6a73b..f10ca31a 100644
--- a/grape/fragment/basic_fragment_loader.h
+++ b/grape/fragment/basic_fragment_loader.h
@@ -16,71 +16,17 @@ limitations under the License.
 #ifndef GRAPE_FRAGMENT_BASIC_FRAGMENT_LOADER_H_
 #define GRAPE_FRAGMENT_BASIC_FRAGMENT_LOADER_H_
 
-#include <stddef.h>
-
-#include <fstream>
-#include <memory>
-#include <string>
-#include <thread>
-#include <tuple>
-#include <utility>
-#include <vector>
-
 #include "grape/communication/shuffle.h"
-#include "grape/config.h"
+#include "grape/fragment/basic_fragment_loader_base.h"
+#include "grape/fragment/rebalancer.h"
 #include "grape/graph/edge.h"
 #include "grape/graph/vertex.h"
-#include "grape/util.h"
-#include "grape/utils/concurrent_queue.h"
-#include "grape/utils/vertex_array.h"
-#include "grape/worker/comm_spec.h"
+#include "grape/vertex_map/vertex_map.h"
 
 namespace grape {
 
-/**
- * @brief LoadGraphSpec determines the specification to load a graph.
- *
- */
-struct LoadGraphSpec {
-  bool directed;
-  bool rebalance;
-  int rebalance_vertex_factor;
-
-  bool serialize;
-  std::string serialization_prefix;
-
-  bool deserialize;
-  std::string deserialization_prefix;
-
-  void set_directed(bool val = true) { directed = val; }
-  void set_rebalance(bool flag, int weight) {
-    rebalance = flag;
-    rebalance_vertex_factor = weight;
-  }
-
-  void set_serialize(bool flag, const std::string& prefix) {
-    serialize = flag;
-    serialization_prefix = prefix;
-  }
-
-  void set_deserialize(bool flag, const std::string& prefix) {
-    deserialize = flag;
-    deserialization_prefix = prefix;
-  }
-};
-
-inline LoadGraphSpec DefaultLoadGraphSpec() {
-  LoadGraphSpec spec;
-  spec.directed = true;
-  spec.rebalance = true;
-  spec.rebalance_vertex_factor = 0;
-  spec.serialize = false;
-  spec.deserialize = false;
-  return spec;
-}
-
-template <typename FRAG_T, typename IOADAPTOR_T>
-class BasicFragmentLoader {
+template <typename FRAG_T>
+class BasicFragmentLoader : public BasicFragmentLoaderBase<FRAG_T> {
   using fragment_t = FRAG_T;
   using oid_t = typename fragment_t::oid_t;
   using internal_oid_t = typename InternalOID<oid_t>::type;
@@ -88,216 +34,151 @@ class BasicFragmentLoader {
   using vdata_t = typename fragment_t::vdata_t;
   using edata_t = typename fragment_t::edata_t;
 
-  using vertex_map_t = typename fragment_t::vertex_map_t;
-  using partitioner_t = typename vertex_map_t::partitioner_t;
-
-  static constexpr LoadStrategy load_strategy = fragment_t::load_strategy;
-
  public:
-  explicit BasicFragmentLoader(const CommSpec& comm_spec)
-      : comm_spec_(comm_spec) {
-    comm_spec_.Dup();
-    vm_ptr_ = std::make_shared<vertex_map_t>(comm_spec_);
-    vertices_to_frag_.resize(comm_spec_.fnum());
-    edges_to_frag_.resize(comm_spec_.fnum());
-    for (fid_t fid = 0; fid < comm_spec_.fnum(); ++fid) {
-      int worker_id = comm_spec_.FragToWorker(fid);
-      vertices_to_frag_[fid].Init(comm_spec_.comm(), vertex_tag, 4096000);
-      vertices_to_frag_[fid].SetDestination(worker_id, fid);
-      edges_to_frag_[fid].Init(comm_spec_.comm(), edge_tag, 4096000);
-      edges_to_frag_[fid].SetDestination(worker_id, fid);
-      if (worker_id == comm_spec_.worker_id()) {
-        vertices_to_frag_[fid].DisableComm();
-        edges_to_frag_[fid].DisableComm();
-      }
+  explicit BasicFragmentLoader(const CommSpec& comm_spec,
+                               const LoadGraphSpec& spec)
+      : BasicFragmentLoaderBase<FRAG_T>(comm_spec, spec) {
+    if (spec_.idxer_type == IdxerType::kLocalIdxer) {
+      LOG(ERROR) << "Global vertex map is required in BasicFragmentLoader";
+      spec_.idxer_type = IdxerType::kHashMapIdxer;
+    }
+    if (spec_.rebalance) {
+      LOG(ERROR) << "Rebalance is not supported in BasicFragmentLoader";
+      spec_.rebalance = false;
     }
-
     recv_thread_running_ = false;
   }
 
-  ~BasicFragmentLoader() { Stop(); }
-
-  void SetPartitioner(const partitioner_t& partitioner) {
-    vm_ptr_->SetPartitioner(partitioner);
-  }
-
-  void SetPartitioner(partitioner_t&& partitioner) {
-    vm_ptr_->SetPartitioner(std::move(partitioner));
-  }
-
-  void Start() {
-    vertex_recv_thread_ =
-        std::thread(&BasicFragmentLoader::vertexRecvRoutine, this);
-    edge_recv_thread_ =
-        std::thread(&BasicFragmentLoader::edgeRecvRoutine, this);
-    recv_thread_running_ = true;
-  }
-
-  void Stop() {
+  ~BasicFragmentLoader() {
     if (recv_thread_running_) {
-      for (auto& va : vertices_to_frag_) {
-        va.Flush();
-      }
       for (auto& ea : edges_to_frag_) {
         ea.Flush();
       }
-      vertex_recv_thread_.join();
       edge_recv_thread_.join();
-      recv_thread_running_ = false;
     }
   }
 
-  void AddVertex(const oid_t& id, const vdata_t& data) {
-    internal_oid_t internal_id(id);
-    auto& partitioner = vm_ptr_->GetPartitioner();
-    fid_t fid = partitioner.GetPartitionId(internal_id);
-    vertices_to_frag_[fid].Emplace(internal_id, data);
-  }
-
-  void AddEdge(const oid_t& src, const oid_t& dst, const edata_t& data) {
-    internal_oid_t internal_src(src);
-    internal_oid_t internal_dst(dst);
-    auto& partitioner = vm_ptr_->GetPartitioner();
-    fid_t src_fid = partitioner.GetPartitionId(internal_src);
-    fid_t dst_fid = partitioner.GetPartitionId(internal_dst);
-    edges_to_frag_[src_fid].Emplace(internal_src, internal_dst, data);
-    if (src_fid != dst_fid) {
-      edges_to_frag_[dst_fid].Emplace(internal_src, internal_dst, data);
-    }
+  void AddVertex(const oid_t& id, const vdata_t& data) override {
+    vertices_.emplace_back(id);
+    vdata_.emplace_back(data);
   }
 
-  bool SerializeFragment(std::shared_ptr<fragment_t>& fragment,
-                         const std::string& serialization_prefix) {
-    std::string type_prefix = fragment_t::type_info();
-    std::string typed_prefix = serialization_prefix + "/" + type_prefix;
-    char serial_file[1024];
-    snprintf(serial_file, sizeof(serial_file), "%s/%s", typed_prefix.c_str(),
-             kSerializationVertexMapFilename);
-    vm_ptr_->template Serialize<IOADAPTOR_T>(typed_prefix);
-    fragment->template Serialize<IOADAPTOR_T>(typed_prefix);
-
-    return true;
-  }
+  void ConstructVertices() override {
+    fid_t fid = comm_spec_.fid();
+    fid_t fnum = comm_spec_.fnum();
+    std::unique_ptr<IPartitioner<oid_t>> partitioner(nullptr);
+    if (spec_.partitioner_type == PartitionerType::kHashPartitioner) {
+      partitioner = std::unique_ptr<HashPartitioner<oid_t>>(
+          new HashPartitioner<oid_t>(fnum));
+    } else if (spec_.partitioner_type == PartitionerType::kMapPartitioner) {
+      std::vector<oid_t> all_vertices;
+      sync_comm::FlatAllGather(vertices_, all_vertices, comm_spec_.comm());
+      DistinctSort(all_vertices);
 
-  bool existSerializationFile(const std::string& prefix) {
-    char vm_fbuf[1024], frag_fbuf[1024];
-    snprintf(vm_fbuf, sizeof(vm_fbuf), "%s/%s", prefix.c_str(),
-             kSerializationVertexMapFilename);
-    snprintf(frag_fbuf, sizeof(frag_fbuf), kSerializationFilenameFormat,
-             prefix.c_str(), comm_spec_.fid());
-    std::string vm_path = vm_fbuf;
-    std::string frag_path = frag_fbuf;
-    return exists_file(vm_path) && exists_file(frag_path);
-  }
+      partitioner = std::unique_ptr<MapPartitioner<oid_t>>(
+          new MapPartitioner<oid_t>(fnum, all_vertices));
+    } else if (spec_.partitioner_type ==
+               PartitionerType::kSegmentedPartitioner) {
+      std::vector<oid_t> all_vertices;
+      sync_comm::FlatAllGather(vertices_, all_vertices, comm_spec_.comm());
+      DistinctSort(all_vertices);
 
-  bool DeserializeFragment(std::shared_ptr<fragment_t>& fragment,
-                           const std::string& deserialization_prefix) {
-    std::string type_prefix = fragment_t::type_info();
-    std::string typed_prefix = deserialization_prefix + "/" + type_prefix;
-    if (!existSerializationFile(typed_prefix)) {
-      return false;
-    }
-    auto io_adaptor =
-        std::unique_ptr<IOADAPTOR_T>(new IOADAPTOR_T(typed_prefix));
-    if (io_adaptor->IsExist()) {
-      vm_ptr_->template Deserialize<IOADAPTOR_T>(typed_prefix,
-                                                 comm_spec_.fid());
-      fragment = std::shared_ptr<fragment_t>(new fragment_t(vm_ptr_));
-      fragment->template Deserialize<IOADAPTOR_T>(typed_prefix,
-                                                  comm_spec_.fid());
-      return true;
+      partitioner = std::unique_ptr<SegmentedPartitioner<oid_t>>(
+          new SegmentedPartitioner<oid_t>(fnum, all_vertices));
     } else {
-      return false;
+      LOG(FATAL) << "Unsupported partitioner type";
+    }
+    std::vector<std::vector<oid_t>> local_vertices_id;
+    std::vector<std::vector<vdata_t>> local_vertices_data;
+    this->ShuffleVertexData(vertices_, vdata_, local_vertices_id,
+                            local_vertices_data, *partitioner);
+    std::vector<oid_t> sorted_vertices;
+    for (auto& buf : local_vertices_id) {
+      sorted_vertices.insert(sorted_vertices.end(), buf.begin(), buf.end());
+    }
+    std::sort(sorted_vertices.begin(), sorted_vertices.end());
+
+    VertexMapBuilder<oid_t, vid_t> builder(fid, fnum, std::move(partitioner),
+                                           spec_.idxer_type);
+    for (auto& v : sorted_vertices) {
+      builder.add_vertex(v);
+    }
+    vertex_map_ =
+        std::unique_ptr<VertexMap<oid_t, vid_t>>(new VertexMap<oid_t, vid_t>());
+    builder.finish(comm_spec_, *vertex_map_);
+
+    for (size_t buf_i = 0; buf_i < local_vertices_id.size(); ++buf_i) {
+      std::vector<oid_t>& local_vertices = local_vertices_id[buf_i];
+      std::vector<vdata_t>& local_vdata = local_vertices_data[buf_i];
+      size_t local_vertices_num = local_vertices.size();
+      for (size_t i = 0; i < local_vertices_num; ++i) {
+        vid_t gid;
+        if (vertex_map_->GetGid(local_vertices[i], gid)) {
+          processed_vertices_.emplace_back(gid, std::move(local_vdata[i]));
+        }
+      }
+    }
+
+    edges_to_frag_.resize(fnum);
+    for (fid_t fid = 0; fid < fnum; ++fid) {
+      int worker_id = comm_spec_.FragToWorker(fid);
+      edges_to_frag_[fid].Init(comm_spec_.comm(), edge_tag, 4096000);
+      edges_to_frag_[fid].SetDestination(worker_id, fid);
+      if (worker_id == comm_spec_.worker_id()) {
+        edges_to_frag_[fid].DisableComm();
+      }
     }
+    edge_recv_thread_ =
+        std::thread(&BasicFragmentLoader::edgeRecvRoutine, this);
+    recv_thread_running_ = true;
   }
 
-  void ConstructFragment(std::shared_ptr<fragment_t>& fragment, bool directed) {
-    for (auto& va : vertices_to_frag_) {
-      va.Flush();
+  void AddEdge(const oid_t& src, const oid_t& dst,
+               const edata_t& data) override {
+    vid_t src_gid, dst_gid;
+    if (vertex_map_->GetGid(src, src_gid) &&
+        vertex_map_->GetGid(dst, dst_gid)) {
+      fid_t src_fid = id_parser_.get_fragment_id(src_gid);
+      fid_t dst_fid = id_parser_.get_fragment_id(dst_gid);
+      edges_to_frag_[src_fid].Emplace(src_gid, dst_gid, data);
+      if (src_fid != dst_fid) {
+        edges_to_frag_[dst_fid].Emplace(src_gid, dst_gid, data);
+      }
     }
+  }
+
+  void ConstructFragment(std::shared_ptr<fragment_t>& fragment) override {
     for (auto& ea : edges_to_frag_) {
       ea.Flush();
     }
-    vertex_recv_thread_.join();
+
     edge_recv_thread_.join();
     recv_thread_running_ = false;
 
     MPI_Barrier(comm_spec_.comm());
 
-    got_vertices_.emplace_back(
-        std::move(vertices_to_frag_[comm_spec_.fid()].buffers()));
-    vertices_to_frag_[comm_spec_.fid()].Clear();
     got_edges_.emplace_back(
         std::move(edges_to_frag_[comm_spec_.fid()].buffers()));
     edges_to_frag_[comm_spec_.fid()].Clear();
 
-    vm_ptr_->Init();
-    auto builder = vm_ptr_->GetLocalBuilder();
-    for (auto& buffers : got_vertices_) {
-      foreach_helper(
-          buffers,
-          [&builder](const internal_oid_t& id) { builder.add_vertex(id); },
-          make_index_sequence<1>{});
-    }
-    for (auto& buffers : got_edges_) {
-      foreach_helper(
-          buffers,
-          [&builder](const internal_oid_t& src, const internal_oid_t& dst) {
-            builder.add_vertex(src);
-            builder.add_vertex(dst);
-          },
-          make_index_sequence<2>{});
-    }
-    builder.finish(*vm_ptr_);
-
-    processed_vertices_.clear();
-    if (!std::is_same<vdata_t, EmptyType>::value) {
-      for (auto& buffers : got_vertices_) {
-        foreach_rval(buffers, [this](internal_oid_t&& id, vdata_t&& data) {
-          vid_t gid;
-          CHECK(vm_ptr_->_GetGid(id, gid));
-          processed_vertices_.emplace_back(gid, std::move(data));
-        });
-      }
-    }
-    got_vertices_.clear();
-
+    std::vector<Edge<vid_t, edata_t>> processed_edges;
     for (auto& buffers : got_edges_) {
-      foreach_rval(buffers, [this](internal_oid_t&& src, internal_oid_t&& dst,
-                                   edata_t&& data) {
-        vid_t src_gid, dst_gid;
-        CHECK(vm_ptr_->_GetGid(src, src_gid));
-        CHECK(vm_ptr_->_GetGid(dst, dst_gid));
-        processed_edges_.emplace_back(src_gid, dst_gid, std::move(data));
+      foreach_rval(buffers, [&processed_edges](vid_t&& src, vid_t&& dst,
+                                               edata_t&& data) {
+        processed_edges.emplace_back(src, dst, std::move(data));
       });
     }
 
-    fragment = std::shared_ptr<fragment_t>(new fragment_t(vm_ptr_));
-    fragment->Init(comm_spec_.fid(), directed, processed_vertices_,
-                   processed_edges_);
+    fragment = std::make_shared<fragment_t>();
+    fragment->Init(comm_spec_, spec_.directed, std::move(vertex_map_),
+                   processed_vertices_, processed_edges);
 
-    if (!std::is_same<vdata_t, EmptyType>::value) {
-      initOuterVertexData(fragment);
-    }
-  }
-
-  void vertexRecvRoutine() {
-    ShuffleIn<internal_oid_t, vdata_t> data_in;
-    data_in.Init(comm_spec_.fnum(), comm_spec_.comm(), vertex_tag);
-    fid_t dst_fid;
-    int src_worker_id;
-    while (!data_in.Finished()) {
-      src_worker_id = data_in.Recv(dst_fid);
-      if (src_worker_id == -1) {
-        break;
-      }
-      got_vertices_.emplace_back(std::move(data_in.buffers()));
-      data_in.Clear();
-    }
+    this->InitOuterVertexData(fragment);
   }
 
+ private:
   void edgeRecvRoutine() {
-    ShuffleIn<internal_oid_t, internal_oid_t, edata_t> data_in;
+    ShuffleIn<vid_t, vid_t, edata_t> data_in;
     data_in.Init(comm_spec_.fnum(), comm_spec_.comm(), edge_tag);
     fid_t dst_fid;
     int src_worker_id;
@@ -306,75 +187,37 @@ class BasicFragmentLoader {
       if (src_worker_id == -1) {
         break;
       }
-      CHECK_EQ(dst_fid, comm_spec_.fid());
-      got_edges_.emplace_back(std::move(data_in.buffers()));
-      data_in.Clear();
-    }
-  }
-
-  void initOuterVertexData(std::shared_ptr<fragment_t> fragment) {
-    int worker_num = comm_spec_.worker_num();
-
-    std::vector<std::vector<vid_t>> request_gid_lists(worker_num);
-    auto& outer_vertices = fragment->OuterVertices();
-    for (auto& v : outer_vertices) {
-      fid_t fid = fragment->GetFragId(v);
-      request_gid_lists[comm_spec_.FragToWorker(fid)].emplace_back(
-          fragment->GetOuterVertexGid(v));
-    }
-    std::vector<std::vector<vid_t>> requested_gid_lists(worker_num);
-    sync_comm::AllToAll(request_gid_lists, requested_gid_lists,
-                        comm_spec_.comm());
-    std::vector<std::vector<vdata_t>> response_vdata_lists(worker_num);
-    for (int i = 0; i < worker_num; ++i) {
-      auto& id_vec = requested_gid_lists[i];
-      auto& data_vec = response_vdata_lists[i];
-      data_vec.reserve(id_vec.size());
-      for (auto id : id_vec) {
-        typename fragment_t::vertex_t v;
-        CHECK(fragment->InnerVertexGid2Vertex(id, v));
-        data_vec.emplace_back(fragment->GetData(v));
-      }
-    }
-    std::vector<std::vector<vdata_t>> responsed_vdata_lists(worker_num);
-    sync_comm::AllToAll(response_vdata_lists, responsed_vdata_lists,
-                        comm_spec_.comm());
-    for (int i = 0; i < worker_num; ++i) {
-      auto& id_vec = request_gid_lists[i];
-      auto& data_vec = responsed_vdata_lists[i];
-      CHECK_EQ(id_vec.size(), data_vec.size());
-      size_t num = id_vec.size();
-      for (size_t k = 0; k < num; ++k) {
-        typename fragment_t::vertex_t v;
-        CHECK(fragment->OuterVertexGid2Vertex(id_vec[k], v));
-        fragment->SetData(v, data_vec[k]);
+      if (dst_fid == comm_spec_.fid()) {
+        got_edges_.emplace_back(std::move(data_in.buffers()));
+        data_in.Clear();
       }
     }
   }
 
- private:
-  CommSpec comm_spec_;
-  std::shared_ptr<vertex_map_t> vm_ptr_;
+  std::vector<oid_t> vertices_;
+  std::vector<vdata_t> vdata_;
+
+  std::vector<internal::Vertex<vid_t, vdata_t>> processed_vertices_;
 
-  std::vector<ShuffleOut<internal_oid_t, vdata_t>> vertices_to_frag_;
-  std::vector<ShuffleOut<internal_oid_t, internal_oid_t, edata_t>>
-      edges_to_frag_;
+  std::unique_ptr<VertexMap<oid_t, vid_t>> vertex_map_;
 
-  std::thread vertex_recv_thread_;
+  std::vector<ShuffleOut<vid_t, vid_t, edata_t>> edges_to_frag_;
   std::thread edge_recv_thread_;
   bool recv_thread_running_;
 
-  std::vector<ShuffleBufferTuple<internal_oid_t, vdata_t>> got_vertices_;
-  std::vector<ShuffleBufferTuple<internal_oid_t, internal_oid_t, edata_t>>
-      got_edges_;
+  std::vector<ShuffleBufferTuple<vid_t, vid_t, edata_t>> got_edges_;
 
-  std::vector<internal::Vertex<vid_t, vdata_t>> processed_vertices_;
-  std::vector<Edge<vid_t, edata_t>> processed_edges_;
+  std::vector<vid_t> src_gid_list_;
+  std::vector<vid_t> dst_gid_list_;
+  std::vector<edata_t> edata_;
+
+  using BasicFragmentLoaderBase<FRAG_T>::comm_spec_;
+  using BasicFragmentLoaderBase<FRAG_T>::spec_;
+  using BasicFragmentLoaderBase<FRAG_T>::id_parser_;
 
-  static constexpr int vertex_tag = 5;
-  static constexpr int edge_tag = 6;
+  using BasicFragmentLoaderBase<FRAG_T>::edge_tag;
 };
 
-}  // namespace grape
+};  // namespace grape
 
 #endif  // GRAPE_FRAGMENT_BASIC_FRAGMENT_LOADER_H_
diff --git a/grape/fragment/basic_fragment_loader_base.h b/grape/fragment/basic_fragment_loader_base.h
new file mode 100644
index 00000000..44c1bdbb
--- /dev/null
+++ b/grape/fragment/basic_fragment_loader_base.h
@@ -0,0 +1,440 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_FRAGMENT_BASIC_FRAGMENT_LOADER_BASE_H_
+#define GRAPE_FRAGMENT_BASIC_FRAGMENT_LOADER_BASE_H_
+
+#include "grape/util.h"
+#include "grape/vertex_map/idxers/idxers.h"
+#include "grape/vertex_map/partitioner.h"
+#include "grape/vertex_map/vertex_map.h"
+
+namespace grape {
+
+/**
+ * @brief LoadGraphSpec determines the specification to load a graph.
+ *
+ */
+struct LoadGraphSpec {
+  bool directed;
+  bool rebalance;
+  int rebalance_vertex_factor;
+
+  bool serialize;
+  std::string serialization_prefix;
+
+  bool deserialize;
+  std::string deserialization_prefix;
+
+  PartitionerType partitioner_type;
+  IdxerType idxer_type;
+
+  void set_directed(bool val = true) { directed = val; }
+  void set_rebalance(bool flag, int weight) {
+    rebalance = flag;
+    rebalance_vertex_factor = weight;
+  }
+
+  void set_serialize(bool flag, const std::string& prefix) {
+    serialize = flag;
+    serialization_prefix = prefix;
+  }
+
+  void set_deserialize(bool flag, const std::string& prefix) {
+    deserialize = flag;
+    deserialization_prefix = prefix;
+  }
+
+  std::string to_string() const {
+    std::string ret;
+    ret += (directed ? "directed-" : "undirected-");
+    if (rebalance) {
+      ret += "rebalance-" + std::to_string(rebalance_vertex_factor) + "-";
+    } else {
+      ret += "no-rebalance-";
+    }
+    if (partitioner_type == PartitionerType::kHashPartitioner) {
+      ret += "hash-partitioner-";
+    } else if (partitioner_type == PartitionerType::kMapPartitioner) {
+      ret += "map-partitioner-";
+    } else if (partitioner_type == PartitionerType::kSegmentedPartitioner) {
+      ret += "segmented-partitioner-";
+    } else {
+      LOG(FATAL) << "Unknown partitioner type";
+    }
+    if (idxer_type == IdxerType::kHashMapIdxer) {
+      ret += "hashmap-idxer";
+    } else if (idxer_type == IdxerType::kSortedArrayIdxer) {
+      ret += "sorted-array-idxer";
+    } else if (idxer_type == IdxerType::kLocalIdxer) {
+      ret += "local-idxer";
+    } else if (idxer_type == IdxerType::kPTHashIdxer) {
+      ret += "pthash-idxer";
+    } else if (idxer_type == IdxerType::kHashMapIdxerView) {
+      ret += "hashmap-idxer-view";
+    } else {
+      LOG(FATAL) << "Unknown idxer type";
+    }
+    return ret;
+  }
+};
+
+inline LoadGraphSpec DefaultLoadGraphSpec() {
+  LoadGraphSpec spec;
+  spec.directed = true;
+  spec.rebalance = true;
+  spec.rebalance_vertex_factor = 0;
+  spec.serialize = false;
+  spec.deserialize = false;
+  spec.partitioner_type = PartitionerType::kHashPartitioner;
+  spec.idxer_type = IdxerType::kHashMapIdxer;
+  return spec;
+}
+
+inline size_t hash_strings(const std::vector<std::string>& strs) {
+  std::hash<std::string> hash_fn;
+  size_t combinedHash = 0;
+  for (auto& str : strs) {
+    combinedHash ^=
+        hash_fn(str) + 0x9e3779b9 + (combinedHash << 6) + (combinedHash >> 2);
+  }
+  return combinedHash;
+}
+
+inline std::string to_hex_string(size_t hash) {
+  std::stringstream ss;
+  ss << std::hex << std::uppercase << hash;
+  return ss.str();
+}
+
+template <typename FRAG_T>
+std::string sigfile_content(const std::string& efile, const std::string& vfile,
+                            const LoadGraphSpec& spec) {
+  std::string spec_info = spec.to_string();
+  std::string frag_type_name = FRAG_T::type_info();
+  std::string desc = "efile: " + efile + "\n";
+  desc += "vfile: " + vfile + "\n";
+  desc += "spec: " + spec_info + "\n";
+  desc += "frag_type: " + frag_type_name + "\n";
+  return desc;
+}
+
+template <typename FRAG_T>
+bool find_serialization(const std::string& efile, const std::string& vfile,
+                        const std::string& serialization_prefix,
+                        const LoadGraphSpec& spec, fid_t fnum,
+                        std::string& prefix_out) {
+  std::string spec_info = spec.to_string();
+  std::string frag_type_name = FRAG_T::type_info();
+  size_t hash_value = hash_strings({efile, vfile, spec_info, frag_type_name});
+  std::string desc = sigfile_content<FRAG_T>(efile, vfile, spec);
+
+  while (true) {
+    std::string typed_prefix = serialization_prefix + "/" +
+                               to_hex_string(hash_value) + "/" + "part_" +
+                               std::to_string(fnum);
+    std::string sigfile_name = typed_prefix + "/sig";
+    if (exists_file(sigfile_name)) {
+      std::string sigfile_content;
+      std::ifstream sigfile(sigfile_name);
+      if (!sigfile.is_open()) {
+        LOG(ERROR) << "Failed to open signature file: " << sigfile_name;
+        return false;
+      }
+      std::string line;
+      while (std::getline(sigfile, line)) {
+        sigfile_content += (line + "\n");
+      }
+      if (sigfile_content == desc) {
+        prefix_out = typed_prefix;
+        return true;
+      }
+    } else {
+      prefix_out = typed_prefix;
+      return false;
+    }
+    ++hash_value;
+  }
+}
+
+template <typename FRAG_T, typename IOADAPTOR_T>
+bool SerializeFragment(std::shared_ptr<FRAG_T>& fragment,
+                       const CommSpec& comm_spec, const std::string& efile,
+                       const std::string& vfile, const LoadGraphSpec& spec) {
+  std::string typed_prefix;
+  bool exist =
+      find_serialization<FRAG_T>(efile, vfile, spec.serialization_prefix, spec,
+                                 comm_spec.fnum(), typed_prefix);
+  if (exist) {
+    LOG(ERROR) << "Serialization exists: " << typed_prefix;
+    return false;
+  }
+
+  if (!create_directories(typed_prefix)) {
+    LOG(ERROR) << "Failed to create directory: " << typed_prefix << ", "
+               << std::strerror(errno);
+    return false;
+  }
+
+  char serial_file[1024];
+  snprintf(serial_file, sizeof(serial_file), "%s/%s", typed_prefix.c_str(),
+           kSerializationVertexMapFilename);
+  fragment->GetVertexMap().template Serialize<IOADAPTOR_T>(typed_prefix,
+                                                           comm_spec);
+  fragment->template Serialize<IOADAPTOR_T>(typed_prefix);
+
+  MPI_Barrier(comm_spec.comm());
+  if (comm_spec.worker_id() == 0) {
+    std::string sigfile_name = typed_prefix + "/sig";
+    std::ofstream sigfile(sigfile_name);
+    if (!sigfile.is_open()) {
+      LOG(ERROR) << "Failed to open signature file: " << sigfile_name;
+      return false;
+    }
+    sigfile << sigfile_content<FRAG_T>(efile, vfile, spec);
+  }
+
+  return true;
+}
+
+template <typename FRAG_T, typename IOADAPTOR_T>
+bool DeserializeFragment(std::shared_ptr<FRAG_T>& fragment,
+                         const CommSpec& comm_spec, const std::string& efile,
+                         const std::string& vfile, const LoadGraphSpec& spec) {
+  std::string typed_prefix;
+  bool exist =
+      find_serialization<FRAG_T>(efile, vfile, spec.deserialization_prefix,
+                                 spec, comm_spec.fnum(), typed_prefix);
+  if (!exist) {
+    LOG(ERROR) << "Serialization not exists: " << typed_prefix;
+    return false;
+  }
+
+  auto io_adaptor = std::unique_ptr<IOADAPTOR_T>(new IOADAPTOR_T(typed_prefix));
+  if (io_adaptor->IsExist()) {
+    std::unique_ptr<VertexMap<typename FRAG_T::oid_t, typename FRAG_T::vid_t>>
+        vm_ptr(new VertexMap<typename FRAG_T::oid_t, typename FRAG_T::vid_t>());
+    vm_ptr->template Deserialize<IOADAPTOR_T>(typed_prefix, comm_spec);
+    fragment = std::shared_ptr<FRAG_T>(new FRAG_T());
+    fragment->template Deserialize<IOADAPTOR_T>(comm_spec, std::move(vm_ptr),
+                                                typed_prefix);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+template <typename FRAG_T>
+class BasicFragmentLoaderBase {
+  using fragment_t = FRAG_T;
+  using oid_t = typename fragment_t::oid_t;
+  using vid_t = typename fragment_t::vid_t;
+  using vdata_t = typename fragment_t::vdata_t;
+  using edata_t = typename fragment_t::edata_t;
+  using internal_oid_t = typename InternalOID<oid_t>::type;
+
+ public:
+  BasicFragmentLoaderBase(const CommSpec& comm_spec, const LoadGraphSpec& spec)
+      : comm_spec_(comm_spec), spec_(spec) {
+    comm_spec_.Dup();
+    id_parser_.init(comm_spec_.fnum());
+  }
+  virtual ~BasicFragmentLoaderBase() {}
+
+  virtual void AddVertex(const oid_t& id, const vdata_t& data) = 0;
+  virtual void ConstructVertices() = 0;
+  virtual void AddEdge(const oid_t& src, const oid_t& dst,
+                       const edata_t& data) = 0;
+  virtual void ConstructFragment(std::shared_ptr<fragment_t>& fragment) = 0;
+
+ protected:
+  void InitOuterVertexData(std::shared_ptr<fragment_t> fragment) {
+    int worker_num = comm_spec_.worker_num();
+
+    std::vector<std::vector<vid_t>> request_gid_lists(worker_num);
+    auto& outer_vertices = fragment->OuterVertices();
+    for (auto& v : outer_vertices) {
+      fid_t fid = fragment->GetFragId(v);
+      request_gid_lists[comm_spec_.FragToWorker(fid)].emplace_back(
+          fragment->GetOuterVertexGid(v));
+    }
+    std::vector<std::vector<vid_t>> requested_gid_lists(worker_num);
+    sync_comm::AllToAll(request_gid_lists, requested_gid_lists,
+                        comm_spec_.comm());
+    std::vector<std::vector<vdata_t>> response_vdata_lists(worker_num);
+    for (int i = 0; i < worker_num; ++i) {
+      auto& id_vec = requested_gid_lists[i];
+      auto& data_vec = response_vdata_lists[i];
+      data_vec.reserve(id_vec.size());
+      for (auto id : id_vec) {
+        typename fragment_t::vertex_t v;
+        CHECK(fragment->InnerVertexGid2Vertex(id, v));
+        data_vec.emplace_back(fragment->GetData(v));
+      }
+    }
+    std::vector<std::vector<vdata_t>> responsed_vdata_lists(worker_num);
+    sync_comm::AllToAll(response_vdata_lists, responsed_vdata_lists,
+                        comm_spec_.comm());
+    for (int i = 0; i < worker_num; ++i) {
+      auto& id_vec = request_gid_lists[i];
+      auto& data_vec = responsed_vdata_lists[i];
+      CHECK_EQ(id_vec.size(), data_vec.size());
+      size_t num = id_vec.size();
+      for (size_t k = 0; k < num; ++k) {
+        typename fragment_t::vertex_t v;
+        CHECK(fragment->OuterVertexGid2Vertex(id_vec[k], v));
+        fragment->SetData(v, data_vec[k]);
+      }
+    }
+  }
+
+  void ShuffleVertex(const std::vector<oid_t>& added_vertices_id,
+                     std::vector<std::vector<oid_t>>& local_vertices_id,
+                     const IPartitioner<oid_t>& partitioner) {
+    fid_t fnum = comm_spec_.fnum();
+    fid_t fid = comm_spec_.fid();
+    std::vector<std::vector<oid_t>> partitioned_vertices_out(fnum);
+    size_t added_vertices = added_vertices_id.size();
+    for (size_t i = 0; i < added_vertices; ++i) {
+      fid_t dst_fid = partitioner.GetPartitionId(added_vertices_id[i]);
+      if (dst_fid == fnum) {
+        LOG(ERROR) << "Unknown partition id for vertex "
+                   << added_vertices_id[i];
+      } else {
+        partitioned_vertices_out[dst_fid].emplace_back(
+            std::move(added_vertices_id[i]));
+      }
+    }
+
+    local_vertices_id.emplace_back(std::move(partitioned_vertices_out[fid]));
+
+    std::thread send_thread([&]() {
+      int dst_worker_id =
+          (comm_spec_.worker_id() + 1) % comm_spec_.worker_num();
+      while (dst_worker_id != comm_spec_.worker_id()) {
+        for (fid_t fid = 0; fid < fnum; ++fid) {
+          if (comm_spec_.FragToWorker(fid) != dst_worker_id) {
+            continue;
+          }
+          sync_comm::Send(partitioned_vertices_out[fid], dst_worker_id,
+                          vertex_tag, comm_spec_.comm());
+        }
+        dst_worker_id = (dst_worker_id + 1) % comm_spec_.worker_num();
+      }
+    });
+    std::thread recv_thread([&]() {
+      int src_worker_id =
+          (comm_spec_.worker_id() + comm_spec_.worker_num() - 1) %
+          comm_spec_.worker_num();
+      while (src_worker_id != comm_spec_.worker_id()) {
+        for (fid_t fid = 0; fid < fnum; ++fid) {
+          if (comm_spec_.FragToWorker(fid) != comm_spec_.worker_id()) {
+            continue;
+          }
+          std::vector<oid_t> recv_vertices;
+          sync_comm::Recv(recv_vertices, src_worker_id, vertex_tag,
+                          comm_spec_.comm());
+          local_vertices_id.emplace_back(std::move(recv_vertices));
+        }
+        src_worker_id = (src_worker_id + comm_spec_.worker_num() - 1) %
+                        comm_spec_.worker_num();
+      }
+    });
+
+    recv_thread.join();
+    send_thread.join();
+  }
+
+  void ShuffleVertexData(const std::vector<oid_t>& added_vertices_id,
+                         const std::vector<vdata_t>& added_vertices_data,
+                         std::vector<std::vector<oid_t>>& local_vertices_id,
+                         std::vector<std::vector<vdata_t>>& local_vertices_data,
+                         const IPartitioner<oid_t>& partitioner) {
+    fid_t fnum = comm_spec_.fnum();
+    fid_t fid = comm_spec_.fid();
+    std::vector<std::vector<oid_t>> partitioned_vertices_out(fnum);
+    std::vector<std::vector<vdata_t>> partitioned_vdata_out(fnum);
+    size_t added_vertices = added_vertices_id.size();
+    for (size_t i = 0; i < added_vertices; ++i) {
+      fid_t dst_fid = partitioner.GetPartitionId(added_vertices_id[i]);
+      if (dst_fid == fnum) {
+        LOG(ERROR) << "Unknown partition id for vertex "
+                   << added_vertices_id[i];
+      } else {
+        partitioned_vertices_out[dst_fid].emplace_back(
+            std::move(added_vertices_id[i]));
+        partitioned_vdata_out[dst_fid].emplace_back(
+            std::move(added_vertices_data[i]));
+      }
+    }
+
+    local_vertices_id.emplace_back(std::move(partitioned_vertices_out[fid]));
+    local_vertices_data.emplace_back(std::move(partitioned_vdata_out[fid]));
+
+    std::thread send_thread([&]() {
+      int dst_worker_id =
+          (comm_spec_.worker_id() + 1) % comm_spec_.worker_num();
+      while (dst_worker_id != comm_spec_.worker_id()) {
+        for (fid_t fid = 0; fid < fnum; ++fid) {
+          if (comm_spec_.FragToWorker(fid) != dst_worker_id) {
+            continue;
+          }
+          sync_comm::Send(partitioned_vertices_out[fid], dst_worker_id,
+                          vertex_tag, comm_spec_.comm());
+          sync_comm::Send(partitioned_vdata_out[fid], dst_worker_id, vertex_tag,
+                          comm_spec_.comm());
+        }
+        dst_worker_id = (dst_worker_id + 1) % comm_spec_.worker_num();
+      }
+    });
+    std::thread recv_thread([&]() {
+      int src_worker_id =
+          (comm_spec_.worker_id() + comm_spec_.worker_num() - 1) %
+          comm_spec_.worker_num();
+      while (src_worker_id != comm_spec_.worker_id()) {
+        for (fid_t fid = 0; fid < fnum; ++fid) {
+          if (comm_spec_.FragToWorker(fid) != comm_spec_.worker_id()) {
+            continue;
+          }
+          std::vector<oid_t> recv_vertices;
+          std::vector<vdata_t> recv_vdata;
+          sync_comm::Recv(recv_vertices, src_worker_id, vertex_tag,
+                          comm_spec_.comm());
+          sync_comm::Recv(recv_vdata, src_worker_id, vertex_tag,
+                          comm_spec_.comm());
+          local_vertices_id.emplace_back(std::move(recv_vertices));
+          local_vertices_data.emplace_back(std::move(recv_vdata));
+        }
+
+        src_worker_id = (src_worker_id + comm_spec_.worker_num() - 1) %
+                        comm_spec_.worker_num();
+      }
+    });
+
+    recv_thread.join();
+    send_thread.join();
+  }
+
+  CommSpec comm_spec_;
+  LoadGraphSpec spec_;
+  IdParser<vid_t> id_parser_;
+
+  static constexpr int vertex_tag = 5;
+  static constexpr int edge_tag = 6;
+};
+
+}  // namespace grape
+
+#endif  // GRAPE_FRAGMENT_BASIC_FRAGMENT_LOADER_BASE_H_
diff --git a/grape/fragment/basic_fragment_mutator.h b/grape/fragment/basic_fragment_mutator.h
index 8307a460..37fbf24e 100644
--- a/grape/fragment/basic_fragment_mutator.h
+++ b/grape/fragment/basic_fragment_mutator.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <grape/graph/edge.h>
 #include <grape/graph/vertex.h>
 #include <grape/utils/concurrent_queue.h>
+#include <grape/vertex_map/vertex_map.h>
 #include <grape/worker/comm_spec.h>
 
 namespace grape {
@@ -41,7 +42,6 @@ struct Mutation {
 template <typename FRAG_T>
 class BasicFragmentMutator {
   using fragment_t = FRAG_T;
-  using vertex_map_t = typename FRAG_T::vertex_map_t;
   using oid_t = typename FRAG_T::oid_t;
   using internal_oid_t = typename InternalOID<oid_t>::type;
   using vid_t = typename FRAG_T::vid_t;
@@ -49,27 +49,18 @@ class BasicFragmentMutator {
   using edata_t = typename FRAG_T::edata_t;
   using mutation_t = Mutation<vid_t, vdata_t, edata_t>;
   static constexpr LoadStrategy load_strategy = FRAG_T::load_strategy;
-  using partitioner_t = typename vertex_map_t::partitioner_t;
 
  public:
   explicit BasicFragmentMutator(const CommSpec& comm_spec,
                                 std::shared_ptr<fragment_t> fragment)
       : comm_spec_(comm_spec),
         fragment_(fragment),
-        vm_ptr_(fragment->GetVertexMap()) {
+        vm_(fragment->GetVertexMap()) {
     comm_spec_.Dup();
   }
 
   ~BasicFragmentMutator() = default;
 
-  void SetPartitioner(const partitioner_t& partitioner) {
-    vm_ptr_->SetPartitioner(partitioner);
-  }
-
-  void SetPartitioner(partitioner_t&& partitioner) {
-    vm_ptr_->SetPartitioner(std::move(partitioner));
-  }
-
   void AddVerticesToRemove(const std::vector<vid_t>& id_vec) {
     if (parsed_vertices_to_remove_.empty()) {
       parsed_vertices_to_remove_ = id_vec;
@@ -132,6 +123,7 @@ class BasicFragmentMutator {
       shuf.Flush();
     }
     recv_thread_.join();
+    MPI_Barrier(comm_spec_.comm());
     got_vertices_to_add_.emplace_back(
         std::move(vertices_to_add_[comm_spec_.fid()].buffers()));
     got_vertices_to_remove_.emplace_back(
@@ -150,8 +142,8 @@ class BasicFragmentMutator {
         foreach_rval(buffers, [this](internal_oid_t&& src, internal_oid_t&& dst,
                                      edata_t&& data) {
           vid_t src_gid, dst_gid;
-          if (vm_ptr_->_GetGid(src, src_gid) &&
-              vm_ptr_->_GetGid(dst, dst_gid)) {
+          if (vm_.GetGid(oid_t(src), src_gid) &&
+              vm_.GetGid(oid_t(dst), dst_gid)) {
             mutation_.edges_to_update.emplace_back(src_gid, dst_gid,
                                                    std::move(data));
           }
@@ -161,20 +153,21 @@ class BasicFragmentMutator {
     got_edges_to_update_.clear();
 
     for (auto& buffers : got_edges_to_remove_) {
-      foreach(buffers, [this](const internal_oid_t& src,
-                              const internal_oid_t& dst) {
-        vid_t src_gid, dst_gid;
-        if (vm_ptr_->_GetGid(src, src_gid) && vm_ptr_->_GetGid(dst, dst_gid)) {
-          mutation_.edges_to_remove.emplace_back(src_gid, dst_gid);
-        }
-      });
+      foreach(buffers,
+              [this](const internal_oid_t& src, const internal_oid_t& dst) {
+                vid_t src_gid, dst_gid;
+                if (vm_.GetGid(oid_t(src), src_gid) &&
+                    vm_.GetGid(oid_t(dst), dst_gid)) {
+                  mutation_.edges_to_remove.emplace_back(src_gid, dst_gid);
+                }
+              });
     }
     got_edges_to_remove_.clear();
 
     for (auto& buffers : got_vertices_to_remove_) {
       foreach(buffers, [this](const internal_oid_t& id) {
         vid_t gid;
-        if (vm_ptr_->_GetGid(id, gid)) {
+        if (vm_.GetGid(oid_t(id), gid)) {
           parsed_vertices_to_remove_.emplace_back(gid);
         }
       });
@@ -185,7 +178,7 @@ class BasicFragmentMutator {
       for (auto& buffers : got_vertices_to_update_) {
         foreach_rval(buffers, [this](internal_oid_t&& id, vdata_t&& data) {
           vid_t gid;
-          if (vm_ptr_->_GetGid(id, gid)) {
+          if (vm_.GetGid(oid_t(id), gid)) {
             parsed_vertices_to_update_.emplace_back(gid, std::move(data));
           }
         });
@@ -193,33 +186,33 @@ class BasicFragmentMutator {
     }
     got_vertices_to_update_.clear();
 
-    auto builder = vm_ptr_->GetLocalBuilder();
+    std::vector<oid_t> local_vertices_to_add;
+
     for (auto& buffers : got_vertices_to_add_) {
-      foreach_rval(buffers,
-                   [this, &builder](internal_oid_t&& id, vdata_t&& data) {
-                     vid_t gid;
-                     builder.add_local_vertex(id, gid);
-                     parsed_vertices_to_add_.emplace_back(gid, std::move(data));
-                   });
+      foreach(buffers, [&local_vertices_to_add](const internal_oid_t& id,
+                                                const vdata_t& data) {
+        local_vertices_to_add.emplace_back(oid_t(id));
+      });
     }
-    got_vertices_to_add_.clear();
 
-    for (auto& buffers : got_edges_to_add_) {
-      foreach_helper(
-          buffers,
-          [&builder](const internal_oid_t& src, const internal_oid_t& dst) {
-            builder.add_vertex(src);
-            builder.add_vertex(dst);
-          },
-          make_index_sequence<2>{});
+    vm_.ExtendVertices(comm_spec_, std::move(local_vertices_to_add));
+
+    for (auto& buffers : got_vertices_to_add_) {
+      foreach_rval(buffers, [this](internal_oid_t&& id, vdata_t&& data) {
+        vid_t gid;
+        if (vm_.GetGid(oid_t(id), gid)) {
+          parsed_vertices_to_add_.emplace_back(gid, std::move(data));
+        }
+      });
     }
-    builder.finish(*vm_ptr_);
+    got_vertices_to_add_.clear();
 
     for (auto& buffers : got_edges_to_add_) {
       foreach_rval(buffers, [this](internal_oid_t&& src, internal_oid_t&& dst,
                                    edata_t&& data) {
         vid_t src_gid, dst_gid;
-        if (vm_ptr_->_GetGid(src, src_gid) && vm_ptr_->_GetGid(dst, dst_gid)) {
+        if (vm_.GetGid(oid_t(src), src_gid) &&
+            vm_.GetGid(oid_t(dst), dst_gid)) {
           mutation_.edges_to_add.emplace_back(src_gid, dst_gid,
                                               std::move(data));
         }
@@ -276,8 +269,7 @@ class BasicFragmentMutator {
   }
 
   void AddVertex(const internal_oid_t& id, const vdata_t& data) {
-    auto& partitioner = vm_ptr_->GetPartitioner();
-    fid_t fid = partitioner.GetPartitionId(id);
+    fid_t fid = vm_.GetFragmentId(oid_t(id));
     vertices_to_add_[fid].Emplace(id, data);
   }
 
@@ -294,9 +286,8 @@ class BasicFragmentMutator {
 
   void AddEdge(const internal_oid_t& src, const internal_oid_t& dst,
                const edata_t& data) {
-    auto& partitioner = vm_ptr_->GetPartitioner();
-    fid_t src_fid = partitioner.GetPartitionId(src);
-    fid_t dst_fid = partitioner.GetPartitionId(dst);
+    fid_t src_fid = vm_.GetFragmentId(oid_t(src));
+    fid_t dst_fid = vm_.GetFragmentId(oid_t(dst));
     edges_to_add_[src_fid].Emplace(src, dst, data);
     if (src_fid != dst_fid) {
       edges_to_add_[dst_fid].Emplace(src, dst, data);
@@ -318,8 +309,7 @@ class BasicFragmentMutator {
   }
 
   void RemoveVertex(const oid_t& id) {
-    auto& partitioner = vm_ptr_->GetPartitioner();
-    fid_t fid = partitioner.GetPartitionId(id);
+    fid_t fid = vm_.GetFragmentId(id);
     vertices_to_remove_[fid].Emplace(id);
   }
 
@@ -332,9 +322,8 @@ class BasicFragmentMutator {
   }
 
   void RemoveEdge(const oid_t& src, const oid_t& dst) {
-    auto& partitioner = vm_ptr_->GetPartitioner();
-    fid_t src_fid = partitioner.GetPartitionId(src);
-    fid_t dst_fid = partitioner.GetPartitionId(dst);
+    fid_t src_fid = vm_.GetFragmentId(src);
+    fid_t dst_fid = vm_.GetFragmentId(dst);
     edges_to_remove_[src_fid].Emplace(src, dst);
     if (src_fid != dst_fid) {
       edges_to_remove_[dst_fid].Emplace(src, dst);
@@ -359,8 +348,7 @@ class BasicFragmentMutator {
   template <typename Q = vdata_t>
   typename std::enable_if<!std::is_same<Q, EmptyType>::value>::type
   UpdateVertex(const oid_t& id, const vdata_t& data) {
-    auto& partitioner = vm_ptr_->GetPartitioner();
-    fid_t fid = partitioner.GetPartitionId(id);
+    fid_t fid = vm_.GetFragmentId(id);
     vertices_to_update_[fid].Emplace(id, data);
   }
 
@@ -384,9 +372,8 @@ class BasicFragmentMutator {
   }
 
   void UpdateEdge(const oid_t& src, const oid_t& dst, const edata_t& data) {
-    auto& partitioner = vm_ptr_->GetPartitioner();
-    fid_t src_fid = partitioner.GetPartitionId(src);
-    fid_t dst_fid = partitioner.GetPartitionId(dst);
+    fid_t src_fid = vm_.GetFragmentId(src);
+    fid_t dst_fid = vm_.GetFragmentId(dst);
     edges_to_update_[src_fid].Emplace(src, dst, data);
     if (src_fid != dst_fid) {
       edges_to_update_[dst_fid].Emplace(src, dst, data);
@@ -489,8 +476,8 @@ class BasicFragmentMutator {
   }
 
   CommSpec comm_spec_;
+
   std::shared_ptr<fragment_t> fragment_;
-  std::shared_ptr<vertex_map_t> vm_ptr_;
 
   std::thread recv_thread_;
 
@@ -525,6 +512,7 @@ class BasicFragmentMutator {
   static constexpr int eu_tag = 6;
 
   mutation_t mutation_;
+  VertexMap<oid_t, vid_t>& vm_;
 };
 
 }  // namespace grape
diff --git a/grape/fragment/basic_local_fragment_loader.h b/grape/fragment/basic_local_fragment_loader.h
new file mode 100644
index 00000000..d50c8e37
--- /dev/null
+++ b/grape/fragment/basic_local_fragment_loader.h
@@ -0,0 +1,250 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_FRAGMENT_BASIC_LOCAL_FRAGMENT_LOADER_H_
+#define GRAPE_FRAGMENT_BASIC_LOCAL_FRAGMENT_LOADER_H_
+
+namespace grape {
+
+template <typename FRAG_T>
+class BasicLocalFragmentLoader : public BasicFragmentLoaderBase<FRAG_T> {
+  using fragment_t = FRAG_T;
+  using oid_t = typename fragment_t::oid_t;
+  using internal_oid_t = typename InternalOID<oid_t>::type;
+  using vid_t = typename fragment_t::vid_t;
+  using vdata_t = typename fragment_t::vdata_t;
+  using edata_t = typename fragment_t::edata_t;
+
+ public:
+  explicit BasicLocalFragmentLoader(const CommSpec& comm_spec,
+                                    const LoadGraphSpec& spec)
+      : BasicFragmentLoaderBase<FRAG_T>(comm_spec, spec) {
+    if (spec_.idxer_type != IdxerType::kLocalIdxer) {
+      LOG(ERROR) << "Local vertex map is required in BasicLocalFragmentLoader";
+      spec_.idxer_type = IdxerType::kLocalIdxer;
+    }
+    if (spec_.rebalance) {
+      LOG(ERROR) << "Rebalance is not supported in BasicLocalFragmentLoader";
+      spec_.rebalance = false;
+    }
+    if (spec_.partitioner_type != PartitionerType::kHashPartitioner) {
+      LOG(ERROR) << "Only hash partitioner is supported in "
+                    "BasicLocalFragmentLoader";
+      spec_.partitioner_type = PartitionerType::kHashPartitioner;
+    }
+    partitioner_ = std::unique_ptr<HashPartitioner<oid_t>>(
+        new HashPartitioner<oid_t>(comm_spec_.fnum()));
+
+    vertices_to_frag_.resize(comm_spec_.fnum());
+    edges_to_frag_.resize(comm_spec_.fnum());
+    for (fid_t fid = 0; fid < comm_spec_.fnum(); ++fid) {
+      int worker_id = comm_spec_.FragToWorker(fid);
+      vertices_to_frag_[fid].Init(comm_spec_.comm(), vertex_tag, 4096000);
+      vertices_to_frag_[fid].SetDestination(worker_id, fid);
+      edges_to_frag_[fid].Init(comm_spec_.comm(), edge_tag, 4096000);
+      edges_to_frag_[fid].SetDestination(worker_id, fid);
+      if (worker_id == comm_spec_.worker_id()) {
+        vertices_to_frag_[fid].DisableComm();
+        edges_to_frag_[fid].DisableComm();
+      }
+    }
+
+    vertex_recv_thread_ =
+        std::thread(&BasicLocalFragmentLoader::vertexRecvRoutine, this);
+    vertex_recv_thread_running_ = true;
+  }
+
+  ~BasicLocalFragmentLoader() {
+    if (vertex_recv_thread_running_) {
+      for (auto& va : vertices_to_frag_) {
+        va.Flush();
+      }
+      vertex_recv_thread_.join();
+    }
+    if (edge_recv_thread_running_) {
+      for (auto& ea : edges_to_frag_) {
+        ea.Flush();
+      }
+      edge_recv_thread_.join();
+    }
+  }
+
+  void AddVertex(const oid_t& id, const vdata_t& data) override {
+    internal_oid_t internal_id(id);
+    fid_t fid = partitioner_->GetPartitionId(internal_id);
+    if (fid == comm_spec_.fnum()) {
+      LOG(ERROR) << "Unknown partition id for vertex " << id;
+    } else {
+      vertices_to_frag_[fid].Emplace(internal_id, data);
+    }
+  }
+
+  void ConstructVertices() override {
+    for (auto& va : vertices_to_frag_) {
+      va.Flush();
+    }
+    vertex_recv_thread_.join();
+    vertex_recv_thread_running_ = false;
+
+    got_vertices_.emplace_back(
+        std::move(vertices_to_frag_[comm_spec_.fid()].buffers()));
+    vertices_to_frag_[comm_spec_.fid()].Clear();
+
+    edge_recv_thread_ =
+        std::thread(&BasicLocalFragmentLoader::edgeRecvRoutine, this);
+    edge_recv_thread_running_ = true;
+  }
+
+  void AddEdge(const oid_t& src, const oid_t& dst,
+               const edata_t& data) override {
+    internal_oid_t internal_src(src);
+    internal_oid_t internal_dst(dst);
+    fid_t src_fid = partitioner_->GetPartitionId(internal_src);
+    fid_t dst_fid = partitioner_->GetPartitionId(internal_dst);
+    if (src_fid == comm_spec_.fnum() || dst_fid == comm_spec_.fnum()) {
+      LOG(ERROR) << "Unknown partition id for edge " << src << " -> " << dst;
+    } else {
+      edges_to_frag_[src_fid].Emplace(internal_src, internal_dst, data);
+      if (src_fid != dst_fid) {
+        edges_to_frag_[dst_fid].Emplace(internal_src, internal_dst, data);
+      }
+    }
+  }
+
+  void ConstructFragment(std::shared_ptr<fragment_t>& fragment) override {
+    for (auto& ea : edges_to_frag_) {
+      ea.Flush();
+    }
+    edge_recv_thread_.join();
+    edge_recv_thread_running_ = false;
+
+    MPI_Barrier(comm_spec_.comm());
+    got_edges_.emplace_back(
+        std::move(edges_to_frag_[comm_spec_.fid()].buffers()));
+    edges_to_frag_[comm_spec_.fid()].Clear();
+
+    std::unique_ptr<VertexMap<oid_t, vid_t>> vm_ptr(
+        new VertexMap<oid_t, vid_t>());
+    {
+      VertexMapBuilder<oid_t, vid_t> builder(
+          comm_spec_.fid(), comm_spec_.fnum(), std::move(partitioner_),
+          spec_.idxer_type);
+      for (auto& buffers : got_vertices_) {
+        foreach_helper(
+            buffers,
+            [&builder](const internal_oid_t& id) { builder.add_vertex(id); },
+            make_index_sequence<1>{});
+      }
+      for (auto& buffers : got_edges_) {
+        foreach_helper(
+            buffers,
+            [&builder](const internal_oid_t& src, const internal_oid_t& dst) {
+              builder.add_vertex(src);
+              builder.add_vertex(dst);
+            },
+            make_index_sequence<2>{});
+      }
+      builder.finish(comm_spec_, *vm_ptr);
+    }
+
+    std::vector<internal::Vertex<vid_t, vdata_t>> processed_vertices;
+    for (auto& buffers : got_vertices_) {
+      foreach_rval(buffers, [&vm_ptr, &processed_vertices](internal_oid_t&& id,
+                                                           vdata_t&& data) {
+        vid_t gid;
+        CHECK(vm_ptr->GetGid(oid_t(id), gid));
+        processed_vertices.emplace_back(gid, std::move(data));
+      });
+    }
+
+    std::vector<Edge<vid_t, edata_t>> processed_edges;
+    for (auto& buffers : got_edges_) {
+      foreach_rval(buffers, [&vm_ptr, &processed_edges](internal_oid_t&& src,
+                                                        internal_oid_t&& dst,
+                                                        edata_t&& data) {
+        vid_t src_gid, dst_gid;
+        CHECK(vm_ptr->GetGid(oid_t(src), src_gid));
+        CHECK(vm_ptr->GetGid(oid_t(dst), dst_gid));
+        processed_edges.emplace_back(src_gid, dst_gid, std::move(data));
+      });
+    }
+
+    fragment = std::shared_ptr<fragment_t>(new fragment_t());
+    fragment->Init(comm_spec_, spec_.directed, std::move(vm_ptr),
+                   processed_vertices, processed_edges);
+
+    if (!std::is_same<vdata_t, EmptyType>::value) {
+      this->InitOuterVertexData(fragment);
+    }
+  }
+
+ private:
+  void vertexRecvRoutine() {
+    ShuffleIn<internal_oid_t, vdata_t> data_in;
+    data_in.Init(comm_spec_.fnum(), comm_spec_.comm(), vertex_tag);
+    fid_t dst_fid;
+    int src_worker_id;
+    while (!data_in.Finished()) {
+      src_worker_id = data_in.Recv(dst_fid);
+      if (src_worker_id == -1) {
+        break;
+      }
+      got_vertices_.emplace_back(std::move(data_in.buffers()));
+      data_in.Clear();
+    }
+  }
+
+  void edgeRecvRoutine() {
+    ShuffleIn<internal_oid_t, internal_oid_t, edata_t> data_in;
+    data_in.Init(comm_spec_.fnum(), comm_spec_.comm(), edge_tag);
+    fid_t dst_fid;
+    int src_worker_id;
+    while (!data_in.Finished()) {
+      src_worker_id = data_in.Recv(dst_fid);
+      if (src_worker_id == -1) {
+        break;
+      }
+      CHECK_EQ(dst_fid, comm_spec_.fid());
+      got_edges_.emplace_back(std::move(data_in.buffers()));
+      data_in.Clear();
+    }
+  }
+
+  std::unique_ptr<HashPartitioner<oid_t>> partitioner_;
+
+  std::vector<ShuffleOut<internal_oid_t, vdata_t>> vertices_to_frag_;
+  std::vector<ShuffleOut<internal_oid_t, internal_oid_t, edata_t>>
+      edges_to_frag_;
+
+  std::thread vertex_recv_thread_;
+  bool vertex_recv_thread_running_;
+  std::thread edge_recv_thread_;
+  bool edge_recv_thread_running_;
+
+  std::vector<ShuffleBufferTuple<internal_oid_t, vdata_t>> got_vertices_;
+  std::vector<ShuffleBufferTuple<internal_oid_t, internal_oid_t, edata_t>>
+      got_edges_;
+
+  using BasicFragmentLoaderBase<FRAG_T>::comm_spec_;
+  using BasicFragmentLoaderBase<FRAG_T>::spec_;
+  using BasicFragmentLoaderBase<FRAG_T>::id_parser_;
+
+  using BasicFragmentLoaderBase<FRAG_T>::vertex_tag;
+  using BasicFragmentLoaderBase<FRAG_T>::edge_tag;
+};
+
+}  // namespace grape
+
+#endif  // GRAPE_FRAGMENT_BASIC_LOCAL_FRAGMENT_LOADER_H_
diff --git a/grape/fragment/basic_rb_fragment_loader.h b/grape/fragment/basic_rb_fragment_loader.h
new file mode 100644
index 00000000..92f1b952
--- /dev/null
+++ b/grape/fragment/basic_rb_fragment_loader.h
@@ -0,0 +1,228 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_FRAGMENT_BASIC_RB_FRAGMENT_LOADER_H_
+#define GRAPE_FRAGMENT_BASIC_RB_FRAGMENT_LOADER_H_
+
+#include "grape/fragment/basic_fragment_loader_base.h"
+
+namespace grape {
+
+template <typename FRAG_T>
+class BasicRbFragmentLoader : public BasicFragmentLoaderBase<FRAG_T> {
+  using fragment_t = FRAG_T;
+  using oid_t = typename fragment_t::oid_t;
+  using internal_oid_t = typename InternalOID<oid_t>::type;
+  using vid_t = typename fragment_t::vid_t;
+  using vdata_t = typename fragment_t::vdata_t;
+  using edata_t = typename fragment_t::edata_t;
+
+ public:
+  explicit BasicRbFragmentLoader(const CommSpec& comm_spec,
+                                 const LoadGraphSpec& spec)
+      : BasicFragmentLoaderBase<FRAG_T>(comm_spec, spec) {
+    if (spec_.idxer_type == IdxerType::kLocalIdxer) {
+      LOG(ERROR) << "Global vertex map is required in BasicRbFragmentLoader";
+      spec_.idxer_type = IdxerType::kHashMapIdxer;
+    }
+    if (spec_.partitioner_type == PartitionerType::kHashPartitioner) {
+      LOG(ERROR)
+          << "Hash partitioner is not supported in BasicRbFragmentLoader";
+      spec_.partitioner_type = PartitionerType::kMapPartitioner;
+    }
+  }
+
+  ~BasicRbFragmentLoader() {}
+
+  void AddVertex(const oid_t& id, const vdata_t& data) override {
+    vertices_.emplace_back(id);
+    vdata_.emplace_back(data);
+  }
+
+  void ConstructVertices() override {
+    fid_t fid = comm_spec_.fid();
+    fid_t fnum = comm_spec_.fnum();
+    std::unique_ptr<IPartitioner<oid_t>> partitioner(nullptr);
+    if (spec_.partitioner_type == PartitionerType::kHashPartitioner) {
+      partitioner = std::unique_ptr<HashPartitioner<oid_t>>(
+          new HashPartitioner<oid_t>(fnum));
+    } else if (spec_.partitioner_type == PartitionerType::kMapPartitioner) {
+      std::vector<oid_t> all_vertices;
+      sync_comm::FlatAllGather(vertices_, all_vertices, comm_spec_.comm());
+      DistinctSort(all_vertices);
+
+      partitioner = std::unique_ptr<MapPartitioner<oid_t>>(
+          new MapPartitioner<oid_t>(fnum, all_vertices));
+    } else if (spec_.partitioner_type ==
+               PartitionerType::kSegmentedPartitioner) {
+      std::vector<oid_t> all_vertices;
+      sync_comm::FlatAllGather(vertices_, all_vertices, comm_spec_.comm());
+      DistinctSort(all_vertices);
+
+      partitioner = std::unique_ptr<SegmentedPartitioner<oid_t>>(
+          new SegmentedPartitioner<oid_t>(fnum, all_vertices));
+    } else {
+      LOG(FATAL) << "Unsupported partitioner type";
+    }
+
+    std::vector<std::vector<oid_t>> local_vertices_id;
+    this->ShuffleVertex(vertices_, local_vertices_id, *partitioner);
+
+    std::vector<oid_t> sorted_vertices;
+    for (auto& buf : local_vertices_id) {
+      sorted_vertices.insert(sorted_vertices.end(), buf.begin(), buf.end());
+    }
+    std::sort(sorted_vertices.begin(), sorted_vertices.end());
+
+    VertexMapBuilder<oid_t, vid_t> builder(fid, fnum, std::move(partitioner),
+                                           spec_.idxer_type);
+    for (auto& v : sorted_vertices) {
+      builder.add_vertex(v);
+    }
+    vertex_map_ =
+        std::unique_ptr<VertexMap<oid_t, vid_t>>(new VertexMap<oid_t, vid_t>());
+    builder.finish(comm_spec_, *vertex_map_);
+  }
+
+  void AddEdge(const oid_t& src, const oid_t& dst,
+               const edata_t& data) override {
+    edges_src_.emplace_back(src);
+    edges_dst_.emplace_back(dst);
+    edges_data_.emplace_back(data);
+  }
+
+  void ConstructFragment(std::shared_ptr<fragment_t>& fragment) override {
+    if (spec_.rebalance) {
+      Rebalancer<oid_t, vid_t> rebalancer(spec_.rebalance_vertex_factor,
+                                          std::move(vertex_map_));
+      for (auto& v : edges_src_) {
+        rebalancer.inc_degree(v);
+      }
+      if (!spec_.directed) {
+        for (auto& v : edges_dst_) {
+          rebalancer.inc_degree(v);
+        }
+      }
+
+      vertex_map_ = std::unique_ptr<VertexMap<oid_t, vid_t>>(
+          new VertexMap<oid_t, vid_t>());
+      rebalancer.finish(comm_spec_, *vertex_map_);
+    }
+
+    fid_t fnum = comm_spec_.fnum();
+    std::vector<ShuffleOut<vid_t, vid_t, edata_t>> edges_to_frag(fnum);
+    for (fid_t i = 0; i < fnum; ++i) {
+      int worker_id = comm_spec_.FragToWorker(i);
+      edges_to_frag[i].Init(comm_spec_.comm(), edge_tag, 4096000);
+      edges_to_frag[i].SetDestination(worker_id, i);
+      if (worker_id == comm_spec_.worker_id()) {
+        edges_to_frag[i].DisableComm();
+      }
+    }
+    std::vector<ShuffleBufferTuple<vid_t, vid_t, edata_t>> got_edges;
+    std::thread edge_recv_thread([&, this]() {
+      ShuffleIn<vid_t, vid_t, edata_t> data_in;
+      data_in.Init(comm_spec_.fnum(), comm_spec_.comm(), edge_tag);
+      fid_t dst_fid;
+      int src_worker_id;
+      while (!data_in.Finished()) {
+        src_worker_id = data_in.Recv(dst_fid);
+        if (src_worker_id == -1) {
+          break;
+        }
+        if (dst_fid == comm_spec_.fid()) {
+          got_edges.emplace_back(std::move(data_in.buffers()));
+          data_in.Clear();
+        }
+      }
+    });
+
+    size_t added_edges = edges_src_.size();
+    for (size_t i = 0; i < added_edges; ++i) {
+      vid_t src_gid, dst_gid;
+      if (vertex_map_->GetGid(edges_src_[i], src_gid) &&
+          vertex_map_->GetGid(edges_dst_[i], dst_gid)) {
+        fid_t src_fid = id_parser_.get_fragment_id(src_gid);
+        fid_t dst_fid = id_parser_.get_fragment_id(dst_gid);
+        edges_to_frag[src_fid].Emplace(src_gid, dst_gid, edges_data_[i]);
+        if (src_fid != dst_fid) {
+          edges_to_frag[dst_fid].Emplace(src_gid, dst_gid, edges_data_[i]);
+        }
+      }
+    }
+
+    for (auto& ea : edges_to_frag) {
+      ea.Flush();
+    }
+    edge_recv_thread.join();
+
+    MPI_Barrier(comm_spec_.comm());
+    got_edges.emplace_back(
+        std::move(edges_to_frag[comm_spec_.fid()].buffers()));
+    edges_to_frag[comm_spec_.fid()].Clear();
+
+    std::vector<Edge<vid_t, edata_t>> processed_edges;
+    for (auto& buffers : got_edges) {
+      foreach_rval(buffers, [&processed_edges](vid_t&& src, vid_t&& dst,
+                                               edata_t&& data) {
+        processed_edges.emplace_back(src, dst, std::move(data));
+      });
+    }
+
+    std::vector<std::vector<oid_t>> local_vertices_id;
+    std::vector<std::vector<vdata_t>> local_vertices_data;
+    this->ShuffleVertexData(vertices_, vdata_, local_vertices_id,
+                            local_vertices_data, vertex_map_->GetPartitioner());
+    size_t buf_num = local_vertices_id.size();
+    std::vector<internal::Vertex<vid_t, vdata_t>> processed_vertices;
+    for (size_t buf_i = 0; buf_i < buf_num; ++buf_i) {
+      std::vector<oid_t>& local_vertices = local_vertices_id[buf_i];
+      std::vector<vdata_t>& local_vdata = local_vertices_data[buf_i];
+      size_t local_vertices_num = local_vertices.size();
+      for (size_t i = 0; i < local_vertices_num; ++i) {
+        vid_t gid;
+        if (vertex_map_->GetGid(local_vertices[i], gid)) {
+          processed_vertices.emplace_back(gid, std::move(local_vdata[i]));
+        }
+      }
+    }
+
+    fragment = std::make_shared<fragment_t>();
+    fragment->Init(comm_spec_, spec_.directed, std::move(vertex_map_),
+                   processed_vertices, processed_edges);
+
+    this->InitOuterVertexData(fragment);
+  }
+
+ private:
+  std::vector<oid_t> vertices_;
+  std::vector<vdata_t> vdata_;
+
+  std::vector<oid_t> edges_src_;
+  std::vector<oid_t> edges_dst_;
+  std::vector<edata_t> edges_data_;
+
+  std::unique_ptr<VertexMap<oid_t, vid_t>> vertex_map_;
+
+  using BasicFragmentLoaderBase<FRAG_T>::comm_spec_;
+  using BasicFragmentLoaderBase<FRAG_T>::spec_;
+  using BasicFragmentLoaderBase<FRAG_T>::id_parser_;
+
+  using BasicFragmentLoaderBase<FRAG_T>::edge_tag;
+};
+
+}  // namespace grape
+
+#endif  // GRAPE_FRAGMENT_BASIC_RB_FRAGMENT_LOADER_H_
diff --git a/grape/fragment/csr_edgecut_fragment_base.h b/grape/fragment/csr_edgecut_fragment_base.h
index 42091eee..a84ff96d 100644
--- a/grape/fragment/csr_edgecut_fragment_base.h
+++ b/grape/fragment/csr_edgecut_fragment_base.h
@@ -24,7 +24,6 @@ limitations under the License.
 #include "grape/graph/adj_list.h"
 #include "grape/graph/immutable_csr.h"
 #include "grape/util.h"
-#include "grape/vertex_map/global_vertex_map.h"
 
 namespace grape {
 
diff --git a/grape/fragment/edgecut_fragment_base.h b/grape/fragment/edgecut_fragment_base.h
index 7ad03acd..55521f17 100644
--- a/grape/fragment/edgecut_fragment_base.h
+++ b/grape/fragment/edgecut_fragment_base.h
@@ -137,7 +137,7 @@ class EdgecutFragmentBase
    * @return The original ID.
    */
   OID_T GetInnerVertexId(vertex_t v) const {
-    OID_T oid;
+    OID_T oid{};
     vm_ptr_->GetOid(GetInnerVertexGid(v), oid);
     return oid;
   }
@@ -150,7 +150,7 @@ class EdgecutFragmentBase
    * @return The original ID.
    */
   OID_T GetOuterVertexId(vertex_t v) const {
-    OID_T oid;
+    OID_T oid{};
     vm_ptr_->GetOid(GetOuterVertexGid(v), oid);
     return oid;
   }
diff --git a/grape/fragment/ev_fragment_loader.h b/grape/fragment/ev_fragment_loader.h
index 1b796d64..89bc1d53 100644
--- a/grape/fragment/ev_fragment_loader.h
+++ b/grape/fragment/ev_fragment_loader.h
@@ -23,8 +23,10 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "grape/fragment/basic_efile_fragment_loader.h"
 #include "grape/fragment/basic_fragment_loader.h"
-#include "grape/fragment/partitioner.h"
+#include "grape/fragment/basic_local_fragment_loader.h"
+#include "grape/fragment/basic_rb_fragment_loader.h"
 #include "grape/io/line_parser_base.h"
 #include "grape/io/local_io_adaptor.h"
 #include "grape/io/tsv_line_parser.h"
@@ -51,8 +53,6 @@ class EVFragmentLoader {
   using vdata_t = typename fragment_t::vdata_t;
   using edata_t = typename fragment_t::edata_t;
 
-  using vertex_map_t = typename fragment_t::vertex_map_t;
-  using partitioner_t = typename vertex_map_t::partitioner_t;
   using io_adaptor_t = IOADAPTOR_T;
   using line_parser_t = LINE_PARSER_T;
 
@@ -64,7 +64,7 @@ class EVFragmentLoader {
 
  public:
   explicit EVFragmentLoader(const CommSpec& comm_spec)
-      : comm_spec_(comm_spec), basic_fragment_loader_(comm_spec) {}
+      : comm_spec_(comm_spec), basic_fragment_loader_(nullptr) {}
 
   ~EVFragmentLoader() = default;
 
@@ -72,10 +72,9 @@ class EVFragmentLoader {
                                            const std::string& vfile,
                                            const LoadGraphSpec& spec) {
     std::shared_ptr<fragment_t> fragment(nullptr);
-    CHECK(!spec.rebalance);
-    if (spec.deserialize && (!spec.serialize)) {
-      bool deserialized = basic_fragment_loader_.DeserializeFragment(
-          fragment, spec.deserialization_prefix);
+    if (spec.deserialize) {
+      bool deserialized = DeserializeFragment<fragment_t, IOADAPTOR_T>(
+          fragment, comm_spec_, efile, vfile, spec);
       int flag = 0;
       int sum = 0;
       if (!deserialized) {
@@ -93,10 +92,32 @@ class EVFragmentLoader {
       }
     }
 
-    std::vector<oid_t> id_list;
-    std::vector<vdata_t> vdata_list;
+    if (vfile.empty()) {
+      basic_fragment_loader_ =
+          std::unique_ptr<BasicEFileFragmentLoader<fragment_t>>(
+              new BasicEFileFragmentLoader<fragment_t>(comm_spec_, spec));
+    } else {
+      if (spec.idxer_type != IdxerType::kLocalIdxer) {
+        if (spec.rebalance) {
+          basic_fragment_loader_ =
+              std::unique_ptr<BasicRbFragmentLoader<fragment_t>>(
+                  new BasicRbFragmentLoader<fragment_t>(comm_spec_, spec));
+        } else {
+          basic_fragment_loader_ =
+              std::unique_ptr<BasicFragmentLoader<fragment_t>>(
+                  new BasicFragmentLoader<fragment_t>(comm_spec_, spec));
+        }
+      } else {
+        basic_fragment_loader_ =
+            std::unique_ptr<BasicLocalFragmentLoader<fragment_t>>(
+                new BasicLocalFragmentLoader<fragment_t>(comm_spec_, spec));
+      }
+    }
+
     if (!vfile.empty()) {
       auto io_adaptor = std::unique_ptr<IOADAPTOR_T>(new IOADAPTOR_T(vfile));
+      io_adaptor->SetPartialRead(comm_spec_.worker_id(),
+                                 comm_spec_.worker_num());
       io_adaptor->Open();
       std::string line;
       vdata_t v_data;
@@ -116,24 +137,12 @@ class EVFragmentLoader {
           VLOG(1) << e.what();
           continue;
         }
-        id_list.push_back(vertex_id);
-        vdata_list.push_back(v_data);
+        basic_fragment_loader_->AddVertex(vertex_id, v_data);
       }
       io_adaptor->Close();
     }
 
-    partitioner_t partitioner(comm_spec_.fnum(), id_list);
-
-    basic_fragment_loader_.SetPartitioner(std::move(partitioner));
-
-    basic_fragment_loader_.Start();
-
-    {
-      size_t vnum = id_list.size();
-      for (size_t i = 0; i < vnum; ++i) {
-        basic_fragment_loader_.AddVertex(id_list[i], vdata_list[i]);
-      }
-    }
+    basic_fragment_loader_->ConstructVertices();
 
     {
       auto io_adaptor =
@@ -162,7 +171,7 @@ class EVFragmentLoader {
           continue;
         }
 
-        basic_fragment_loader_.AddEdge(src, dst, e_data);
+        basic_fragment_loader_->AddEdge(src, dst, e_data);
       }
       io_adaptor->Close();
     }
@@ -170,11 +179,11 @@ class EVFragmentLoader {
     VLOG(1) << "[worker-" << comm_spec_.worker_id()
             << "] finished add vertices and edges";
 
-    basic_fragment_loader_.ConstructFragment(fragment, spec.directed);
+    basic_fragment_loader_->ConstructFragment(fragment);
 
     if (spec.serialize) {
-      bool serialized = basic_fragment_loader_.SerializeFragment(
-          fragment, spec.serialization_prefix);
+      bool serialized = SerializeFragment<fragment_t, IOADAPTOR_T>(
+          fragment, comm_spec_, efile, vfile, spec);
       if (!serialized) {
         VLOG(2) << "[worker-" << comm_spec_.worker_id()
                 << "] Serialization failed.";
@@ -187,7 +196,7 @@ class EVFragmentLoader {
  private:
   CommSpec comm_spec_;
 
-  BasicFragmentLoader<fragment_t, io_adaptor_t> basic_fragment_loader_;
+  std::unique_ptr<BasicFragmentLoaderBase<fragment_t>> basic_fragment_loader_;
   line_parser_t line_parser_;
 };
 
diff --git a/grape/fragment/ev_fragment_mutator.h b/grape/fragment/ev_fragment_mutator.h
index 4b9bce35..ce658782 100644
--- a/grape/fragment/ev_fragment_mutator.h
+++ b/grape/fragment/ev_fragment_mutator.h
@@ -17,7 +17,6 @@ limitations under the License.
 #define GRAPE_FRAGMENT_EV_FRAGMENT_MUTATOR_H_
 
 #include <grape/fragment/basic_fragment_mutator.h>
-#include <grape/fragment/partitioner.h>
 #include <grape/util.h>
 
 namespace grape {
diff --git a/grape/fragment/ev_fragment_rebalance_loader.h b/grape/fragment/ev_fragment_rebalance_loader.h
deleted file mode 100644
index 814ba4f6..00000000
--- a/grape/fragment/ev_fragment_rebalance_loader.h
+++ /dev/null
@@ -1,432 +0,0 @@
-/** Copyright 2020 Alibaba Group Holding Limited.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#ifndef GRAPE_FRAGMENT_EV_FRAGMENT_REBALANCE_LOADER_H_
-#define GRAPE_FRAGMENT_EV_FRAGMENT_REBALANCE_LOADER_H_
-
-#include <mpi.h>
-
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "grape/fragment/basic_fragment_loader.h"
-#include "grape/fragment/partitioner.h"
-#include "grape/io/line_parser_base.h"
-#include "grape/io/local_io_adaptor.h"
-#include "grape/io/tsv_line_parser.h"
-#include "grape/worker/comm_spec.h"
-
-namespace grape {
-
-/**
- * @brief EVFragmentLoader is a loader to load fragments from separated
- * efile and vfile.
- *
- * @tparam FRAG_T Fragment type.
- * @tparam IOADAPTOR_T IOAdaptor type.
- * @tparam LINE_PARSER_T LineParser type.
- */
-template <typename FRAG_T, typename IOADAPTOR_T = LocalIOAdaptor,
-          typename LINE_PARSER_T =
-              TSVLineParser<typename FRAG_T::oid_t, typename FRAG_T::vdata_t,
-                            typename FRAG_T::edata_t>>
-class EVFragmentRebalanceLoader {
-  using fragment_t = FRAG_T;
-  using oid_t = typename fragment_t::oid_t;
-  using vid_t = typename fragment_t::vid_t;
-  using vdata_t = typename fragment_t::vdata_t;
-  using edata_t = typename fragment_t::edata_t;
-
-  using vertex_map_t = typename fragment_t::vertex_map_t;
-  using partitioner_t = typename vertex_map_t::partitioner_t;
-  using line_parser_t = LINE_PARSER_T;
-
-  static constexpr LoadStrategy load_strategy = fragment_t::load_strategy;
-
-  static_assert(std::is_base_of<LineParserBase<oid_t, vdata_t, edata_t>,
-                                LINE_PARSER_T>::value,
-                "LineParser type is invalid");
-
- public:
-  explicit EVFragmentRebalanceLoader(const CommSpec& comm_spec)
-      : comm_spec_(comm_spec) {}
-
-  ~EVFragmentRebalanceLoader() = default;
-
-  std::shared_ptr<fragment_t> LoadFragment(const std::string& efile,
-                                           const std::string& vfile,
-                                           const LoadGraphSpec& spec) {
-    std::shared_ptr<fragment_t> fragment(nullptr);
-    if (spec.deserialize && (!spec.serialize)) {
-      bool deserialized = deserializeFragment(fragment, spec);
-      int flag = 0;
-      int sum = 0;
-      if (!deserialized) {
-        flag = 1;
-      }
-      MPI_Allreduce(&flag, &sum, 1, MPI_INT, MPI_SUM, comm_spec_.comm());
-      if (sum != 0) {
-        fragment.reset();
-        if (comm_spec_.worker_id() == 0) {
-          VLOG(2) << "Deserialization failed, start loading graph from "
-                     "efile and vfile.";
-        }
-      } else {
-        return fragment;
-      }
-    }
-
-    std::vector<oid_t> id_list;
-    std::vector<vdata_t> vdata_list;
-
-    CHECK(!vfile.empty());
-    {
-      auto io_adaptor = std::unique_ptr<IOADAPTOR_T>(new IOADAPTOR_T(vfile));
-      io_adaptor->Open();
-      std::string line;
-      vdata_t v_data;
-      oid_t vertex_id;
-      size_t line_no = 0;
-      while (io_adaptor->ReadLine(line)) {
-        ++line_no;
-        if (line_no % 1000000 == 0) {
-          VLOG(10) << "[worker-" << comm_spec_.worker_id() << "][vfile] "
-                   << line_no;
-        }
-        if (line.empty() || line[0] == '#')
-          continue;
-        try {
-          line_parser_.LineParserForVFile(line, vertex_id, v_data);
-        } catch (std::exception& e) {
-          VLOG(1) << e.what();
-          continue;
-        }
-        id_list.push_back(vertex_id);
-        vdata_list.push_back(v_data);
-      }
-      io_adaptor->Close();
-    }
-
-    fid_t fnum = comm_spec_.fnum();
-    partitioner_t partitioner(fnum, id_list);
-
-    std::shared_ptr<vertex_map_t> vm_ptr =
-        std::make_shared<vertex_map_t>(comm_spec_);
-    vm_ptr->SetPartitioner(partitioner);
-    vm_ptr->Init();
-    auto builder = vm_ptr->GetLocalBuilder();
-
-    for (auto id : id_list) {
-      builder.add_vertex(id);
-    }
-    builder.finish(*vm_ptr);
-
-    std::vector<vid_t> src_list, dst_list;
-    std::vector<edata_t> edata_list;
-    {
-      auto io_adaptor =
-          std::unique_ptr<IOADAPTOR_T>(new IOADAPTOR_T(std::string(efile)));
-      io_adaptor->SetPartialRead(comm_spec_.worker_id(),
-                                 comm_spec_.worker_num());
-      io_adaptor->Open();
-      std::string line;
-      edata_t e_data;
-      oid_t src, dst;
-      vid_t src_gid, dst_gid;
-
-      size_t lineNo = 0;
-      while (io_adaptor->ReadLine(line)) {
-        ++lineNo;
-        if (lineNo % 1000000 == 0) {
-          VLOG(10) << "[worker-" << comm_spec_.worker_id() << "][efile] "
-                   << lineNo;
-        }
-        if (line.empty() || line[0] == '#')
-          continue;
-
-        try {
-          line_parser_.LineParserForEFile(line, src, dst, e_data);
-        } catch (std::exception& e) {
-          VLOG(1) << e.what();
-          continue;
-        }
-
-        CHECK(vm_ptr->GetGid(src, src_gid));
-        CHECK(vm_ptr->GetGid(dst, dst_gid));
-
-        src_list.push_back(src_gid);
-        dst_list.push_back(dst_gid);
-        edata_list.push_back(e_data);
-      }
-      io_adaptor->Close();
-    }
-
-    std::vector<std::vector<int>> degree_lists(fnum);
-    std::vector<std::vector<vid_t>> gid_map(fnum);
-    for (fid_t i = 0; i < fnum; ++i) {
-      degree_lists[i].resize(vm_ptr->GetInnerVertexSize(i), 0);
-      gid_map[i].resize(vm_ptr->GetInnerVertexSize(i));
-    }
-
-    for (auto v : src_list) {
-      fid_t fid = vm_ptr->GetFidFromGid(v);
-      vid_t lid = vm_ptr->GetLidFromGid(v);
-      ++degree_lists[fid][lid];
-    }
-    if (!spec.directed) {
-      for (auto v : dst_list) {
-        fid_t fid = vm_ptr->GetFidFromGid(v);
-        vid_t lid = vm_ptr->GetLidFromGid(v);
-        ++degree_lists[fid][lid];
-      }
-    }
-
-    for (fid_t i = 0; i < fnum; ++i) {
-      CHECK_LT(degree_lists[i].size(),
-               static_cast<size_t>(std::numeric_limits<int>::max()));
-      MPI_Allreduce(MPI_IN_PLACE, degree_lists[i].data(),
-                    degree_lists[i].size(), MPI_INT, MPI_SUM,
-                    comm_spec_.comm());
-    }
-
-    size_t total_edge_num = 0;
-    size_t total_vertex_num = 0;
-    for (auto& vec : degree_lists) {
-      total_vertex_num += vec.size();
-      for (auto d : vec) {
-        total_edge_num += d;
-      }
-    }
-
-    size_t total_score =
-        total_edge_num + total_vertex_num * spec.rebalance_vertex_factor;
-    std::vector<size_t> scores_before(fnum, 0), scores_after(fnum, 0);
-    std::vector<size_t> enum_before(fnum, 0), enum_after(fnum, 0);
-
-    fid_t mapped_fid = 0;
-    vid_t mapped_lid = 0;
-    size_t cur_score = 0;
-    size_t expected_score = (total_score + fnum - 1) / fnum;
-    vid_t cur_num = 0;
-    std::vector<vid_t> vnum_list;
-    for (fid_t i = 0; i < fnum; ++i) {
-      vid_t vn = degree_lists[i].size();
-      for (vid_t j = 0; j < vn; ++j) {
-        size_t v_score = spec.rebalance_vertex_factor + degree_lists[i][j];
-        cur_score += v_score;
-        scores_before[i] += v_score;
-        enum_before[i] += degree_lists[i][j];
-        scores_after[mapped_fid] += v_score;
-        enum_after[mapped_fid] += degree_lists[i][j];
-        gid_map[i][j] = vm_ptr->Lid2Gid(mapped_fid, mapped_lid);
-        ++cur_num;
-        if (cur_score >= expected_score) {
-          ++mapped_fid;
-          mapped_lid = 0;
-          cur_score = 0;
-          vnum_list.push_back(cur_num);
-          cur_num = 0;
-        } else {
-          ++mapped_lid;
-        }
-      }
-    }
-    if (mapped_fid == fnum) {
-      CHECK_EQ(mapped_lid, 0);
-    } else {
-      CHECK_EQ(mapped_fid, fnum - 1);
-      vnum_list.push_back(cur_num);
-    }
-
-    if (comm_spec_.worker_id() == 0) {
-      LOG(INFO) << "Total score = " << total_score;
-      for (fid_t i = 0; i < fnum; ++i) {
-        LOG(INFO) << "[frag-" << i
-                  << "]: vertex_num: " << degree_lists[i].size() << " -> "
-                  << vnum_list[i] << ", edge_num: " << enum_before[i] << " -> "
-                  << enum_after[i] << ", score: " << scores_before[i] << " ->"
-                  << scores_after[i];
-      }
-    }
-
-    for (auto& v : src_list) {
-      fid_t fid = vm_ptr->GetFidFromGid(v);
-      vid_t lid = vm_ptr->GetLidFromGid(v);
-      v = gid_map[fid][lid];
-    }
-    for (auto& v : dst_list) {
-      fid_t fid = vm_ptr->GetFidFromGid(v);
-      vid_t lid = vm_ptr->GetLidFromGid(v);
-      v = gid_map[fid][lid];
-    }
-
-    vm_ptr->UpdateToBalance(vnum_list, gid_map);
-
-    std::vector<ShuffleOut<vid_t, vid_t, edata_t>> edges_to_frag(fnum);
-    for (fid_t i = 0; i < fnum; ++i) {
-      int worker_id = comm_spec_.FragToWorker(i);
-      edges_to_frag[i].Init(comm_spec_.comm(), edge_tag, 4096000);
-      edges_to_frag[i].SetDestination(worker_id, i);
-      if (comm_spec_.worker_id() == worker_id) {
-        edges_to_frag[i].DisableComm();
-      }
-    }
-
-    std::vector<internal::Vertex<vid_t, vdata_t>> processed_vertices;
-    std::vector<Edge<vid_t, edata_t>> processed_edges;
-
-    std::thread edge_recv_thread([&]() {
-      ShuffleIn<vid_t, vid_t, edata_t> data_in;
-      data_in.Init(comm_spec_.fnum(), comm_spec_.comm(), edge_tag);
-      fid_t dst_fid;
-      int src_worker_id;
-      while (!data_in.Finished()) {
-        src_worker_id = data_in.Recv(dst_fid);
-        if (src_worker_id == -1) {
-          break;
-        }
-        CHECK_EQ(dst_fid, comm_spec_.fid());
-        auto& buffers = data_in.buffers();
-        foreach_rval(buffers, [&](vid_t&& src, vid_t&& dst, edata_t&& data) {
-          processed_edges.emplace_back(src, dst, std::move(data));
-        });
-        data_in.Clear();
-      }
-    });
-
-    size_t local_enum = src_list.size();
-    for (size_t i = 0; i < local_enum; ++i) {
-      fid_t src_fid = vm_ptr->GetFidFromGid(src_list[i]);
-      fid_t dst_fid = vm_ptr->GetFidFromGid(dst_list[i]);
-      edges_to_frag[src_fid].Emplace(src_list[i], dst_list[i], edata_list[i]);
-      if (src_fid != dst_fid) {
-        edges_to_frag[dst_fid].Emplace(src_list[i], dst_list[i], edata_list[i]);
-      }
-    }
-
-    for (auto& ea : edges_to_frag) {
-      ea.Flush();
-    }
-
-    edge_recv_thread.join();
-    {
-      auto& buffers = edges_to_frag[comm_spec_.fid()].buffers();
-      foreach_rval(buffers, [&](vid_t&& src, vid_t&& dst, edata_t&& data) {
-        processed_edges.emplace_back(src, dst, std::move(data));
-      });
-    }
-
-    size_t vertex_num = id_list.size();
-    if (!std::is_same<vdata_t, EmptyType>::value) {
-      for (size_t i = 0; i < vertex_num; ++i) {
-        vid_t gid;
-        CHECK(vm_ptr->GetGid(id_list[i], gid));
-        fid_t fid = vm_ptr->GetFidFromGid(gid);
-        if (fid == comm_spec_.fid()) {
-          processed_vertices.emplace_back(gid, vdata_list[i]);
-        }
-      }
-    }
-
-    fragment = std::shared_ptr<fragment_t>(new fragment_t(vm_ptr));
-    fragment->Init(comm_spec_.fid(), spec.directed, processed_vertices,
-                   processed_edges);
-
-    if (!std::is_same<vdata_t, EmptyType>::value) {
-      for (size_t i = 0; i < vertex_num; ++i) {
-        typename fragment_t::vertex_t v;
-        if (fragment->GetVertex(id_list[i], v)) {
-          if (fragment->IsOuterVertex(v)) {
-            fragment->SetData(v, vdata_list[i]);
-          }
-        }
-      }
-    }
-
-    if (spec.serialize) {
-      bool serialized = serializeFragment(fragment, vm_ptr, spec);
-      if (!serialized) {
-        VLOG(2) << "[worker-" << comm_spec_.worker_id()
-                << "] Serialization failed.";
-      }
-    }
-
-    return fragment;
-  }
-
- private:
-  bool existSerializationFile(const std::string& prefix) {
-    char vm_fbuf[1024], frag_fbuf[1024];
-    snprintf(vm_fbuf, sizeof(vm_fbuf), "%s/%s", prefix.c_str(),
-             kSerializationVertexMapFilename);
-    snprintf(frag_fbuf, sizeof(frag_fbuf), kSerializationFilenameFormat,
-             prefix.c_str(), comm_spec_.fid());
-    std::string vm_path = vm_fbuf;
-    std::string frag_path = frag_fbuf;
-    return exists_file(vm_path) && exists_file(frag_path);
-  }
-
-  bool deserializeFragment(std::shared_ptr<fragment_t>& fragment,
-                           const LoadGraphSpec& spec) {
-    std::string type_prefix = fragment_t::type_info();
-    CHECK(spec.rebalance);
-    type_prefix += ("_rb_" + std::to_string(spec.rebalance_vertex_factor));
-    std::string typed_prefix = spec.deserialization_prefix + "/" + type_prefix;
-    LOG(INFO) << "typed_prefix = " << typed_prefix;
-    if (!existSerializationFile(typed_prefix)) {
-      return false;
-    }
-    auto io_adaptor =
-        std::unique_ptr<IOADAPTOR_T>(new IOADAPTOR_T(typed_prefix));
-    if (io_adaptor->IsExist()) {
-      std::shared_ptr<vertex_map_t> vm_ptr =
-          std::make_shared<vertex_map_t>(comm_spec_);
-      vm_ptr->template Deserialize<IOADAPTOR_T>(typed_prefix, comm_spec_.fid());
-      fragment = std::shared_ptr<fragment_t>(new fragment_t(vm_ptr));
-      fragment->template Deserialize<IOADAPTOR_T>(typed_prefix,
-                                                  comm_spec_.fid());
-      return true;
-    } else {
-      return false;
-    }
-  }
-
-  bool serializeFragment(std::shared_ptr<fragment_t> fragment,
-                         std::shared_ptr<vertex_map_t> vm_ptr,
-                         const LoadGraphSpec& spec) {
-    std::string type_prefix = fragment_t::type_info();
-    CHECK(spec.rebalance);
-    type_prefix += ("_rb_" + std::to_string(spec.rebalance_vertex_factor));
-    std::string typed_prefix = spec.serialization_prefix + "/" + type_prefix;
-    char serial_file[1024];
-    snprintf(serial_file, sizeof(serial_file), "%s/%s", typed_prefix.c_str(),
-             kSerializationVertexMapFilename);
-    vm_ptr->template Serialize<IOADAPTOR_T>(typed_prefix);
-    fragment->template Serialize<IOADAPTOR_T>(typed_prefix);
-
-    return true;
-  }
-
-  static constexpr int edge_tag = 6;
-
-  CommSpec comm_spec_;
-  line_parser_t line_parser_;
-};
-
-}  // namespace grape
-
-#endif  // GRAPE_FRAGMENT_EV_FRAGMENT_REBALANCE_LOADER_H_
diff --git a/grape/fragment/fragment_base.h b/grape/fragment/fragment_base.h
index 8fca3a08..f2fbd3dc 100644
--- a/grape/fragment/fragment_base.h
+++ b/grape/fragment/fragment_base.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include "grape/graph/adj_list.h"
 #include "grape/graph/edge.h"
 #include "grape/graph/vertex.h"
+#include "grape/vertex_map/vertex_map.h"
 #include "grape/worker/comm_spec.h"
 
 namespace grape {
@@ -51,25 +52,23 @@ template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T,
           typename TRAITS_T>
 class FragmentBase {
  public:
-  using vertex_map_t = typename TRAITS_T::vertex_map_t;
-
   using fragment_adj_list_t = typename TRAITS_T::fragment_adj_list_t;
   using fragment_const_adj_list_t =
       typename TRAITS_T::fragment_const_adj_list_t;
 
   FragmentBase() : vm_ptr_(nullptr) {}
+  virtual ~FragmentBase() {}
 
-  explicit FragmentBase(std::shared_ptr<vertex_map_t> vm_ptr)
-      : vm_ptr_(vm_ptr) {}
-
-  std::shared_ptr<vertex_map_t> GetVertexMap() { return vm_ptr_; }
-  const std::shared_ptr<vertex_map_t> GetVertexMap() const { return vm_ptr_; }
+  VertexMap<OID_T, VID_T>& GetVertexMap() { return *vm_ptr_; }
+  const VertexMap<OID_T, VID_T>& GetVertexMap() const { return *vm_ptr_; }
 
  protected:
-  void init(fid_t fid, bool directed) {
+  void init(fid_t fid, bool directed,
+            std::unique_ptr<VertexMap<OID_T, VID_T>>&& vm_ptr) {
     fid_ = fid;
     directed_ = directed;
-    fnum_ = vm_ptr_->GetFragmentNum();
+    fnum_ = vm_ptr->GetFragmentNum();
+    vm_ptr_ = std::move(vm_ptr);
     id_parser_.init(fnum_);
     ivnum_ = vm_ptr_->GetInnerVertexSize(fid);
   }
@@ -82,7 +81,8 @@ class FragmentBase {
    * @param vertices A set of vertices.
    * @param edges A set of edges.
    */
-  virtual void Init(fid_t fid, bool directed,
+  virtual void Init(const CommSpec& comm_spec, bool directed,
+                    std::unique_ptr<VertexMap<OID_T, VID_T>>&& vm_ptr,
                     std::vector<internal::Vertex<VID_T, VDATA_T>>& vertices,
                     std::vector<Edge<VID_T, EDATA_T>>& edges) = 0;
 
@@ -170,7 +170,7 @@ class FragmentBase {
    * @return Its original ID.
    */
   OID_T GetId(const Vertex<VID_T>& v) const {
-    OID_T oid;
+    OID_T oid{};
     vm_ptr_->GetOid(Vertex2Gid(v), oid);
     return oid;
   }
@@ -326,7 +326,7 @@ class FragmentBase {
   VID_T ivnum_;
 
   vertices_t vertices_;
-  std::shared_ptr<vertex_map_t> vm_ptr_;
+  std::unique_ptr<VertexMap<OID_T, VID_T>> vm_ptr_;
 
   IdParser<VID_T> id_parser_;
 };
diff --git a/grape/fragment/immutable_edgecut_fragment.h b/grape/fragment/immutable_edgecut_fragment.h
index f955abb3..aecbc9d5 100644
--- a/grape/fragment/immutable_edgecut_fragment.h
+++ b/grape/fragment/immutable_edgecut_fragment.h
@@ -42,15 +42,13 @@ limitations under the License.
 #include "grape/types.h"
 #include "grape/util.h"
 #include "grape/utils/vertex_array.h"
-#include "grape/vertex_map/global_vertex_map.h"
 #include "grape/worker/comm_spec.h"
 
 namespace grape {
 class CommSpec;
 class OutArchive;
 
-template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T,
-          typename VERTEX_MAP_T>
+template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T>
 struct ImmutableEdgecutFragmentTraits {
   using inner_vertices_t = VertexRange<VID_T>;
   using outer_vertices_t = VertexRange<VID_T>;
@@ -63,7 +61,6 @@ struct ImmutableEdgecutFragmentTraits {
   using csr_t = ImmutableCSR<VID_T, Nbr<VID_T, EDATA_T>>;
   using csr_builder_t = ImmutableCSRBuild<VID_T, Nbr<VID_T, EDATA_T>>;
   using mirror_vertices_t = std::vector<Vertex<VID_T>>;
-  using vertex_map_t = VERTEX_MAP_T;
 };
 
 /**
@@ -114,16 +111,14 @@ struct ImmutableEdgecutFragmentTraits {
  *
  */
 template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T,
-          LoadStrategy _load_strategy = LoadStrategy::kOnlyOut,
-          typename VERTEX_MAP_T = GlobalVertexMap<OID_T, VID_T>>
+          LoadStrategy _load_strategy = LoadStrategy::kOnlyOut>
 class ImmutableEdgecutFragment
     : public CSREdgecutFragmentBase<
           OID_T, VID_T, VDATA_T, EDATA_T,
-          ImmutableEdgecutFragmentTraits<OID_T, VID_T, VDATA_T, EDATA_T,
-                                         VERTEX_MAP_T>> {
+          ImmutableEdgecutFragmentTraits<OID_T, VID_T, VDATA_T, EDATA_T>> {
  public:
-  using traits_t = ImmutableEdgecutFragmentTraits<OID_T, VID_T, VDATA_T,
-                                                  EDATA_T, VERTEX_MAP_T>;
+  using traits_t =
+      ImmutableEdgecutFragmentTraits<OID_T, VID_T, VDATA_T, EDATA_T>;
   using base_t =
       CSREdgecutFragmentBase<OID_T, VID_T, VDATA_T, EDATA_T, traits_t>;
   using internal_vertex_t = internal::Vertex<VID_T, VDATA_T>;
@@ -134,7 +129,6 @@ class ImmutableEdgecutFragment
   using oid_t = OID_T;
   using vdata_t = VDATA_T;
   using edata_t = EDATA_T;
-  using vertex_map_t = typename traits_t::vertex_map_t;
 
   using IsEdgeCut = std::true_type;
   using IsVertexCut = std::false_type;
@@ -155,10 +149,8 @@ class ImmutableEdgecutFragment
   template <typename T>
   using vertex_array_t = VertexArray<vertices_t, T>;
 
-  ImmutableEdgecutFragment() {}
-
-  explicit ImmutableEdgecutFragment(std::shared_ptr<vertex_map_t> vm_ptr)
-      : FragmentBase<OID_T, VID_T, VDATA_T, EDATA_T, traits_t>(vm_ptr) {}
+  ImmutableEdgecutFragment()
+      : FragmentBase<OID_T, VID_T, VDATA_T, EDATA_T, traits_t>() {}
 
   virtual ~ImmutableEdgecutFragment() = default;
 
@@ -167,41 +159,65 @@ class ImmutableEdgecutFragment
   using base_t::IsInnerVertexGid;
 
   static std::string type_info() {
-    std::string ret = "";
+    std::string ret = "ImmutableEdgecutFragment<";
+    if (std::is_same<OID_T, int64_t>::value) {
+      ret += "int64_t, ";
+    } else if (std::is_same<OID_T, int32_t>::value) {
+      ret += "int32_t, ";
+    } else if (std::is_same<OID_T, std::string>::value) {
+      ret += "std::string, ";
+    } else {
+      LOG(FATAL) << "OID_T type not supported...";
+    }
+
+    if (std::is_same<VID_T, uint64_t>::value) {
+      ret += "uint64_t, ";
+    } else if (std::is_same<VID_T, uint32_t>::value) {
+      ret += "uint32_t, ";
+    } else {
+      LOG(FATAL) << "VID_T type not supported...";
+    }
+
+    if (std::is_same<VDATA_T, EmptyType>::value) {
+      ret += "empty, ";
+    } else if (std::is_same<VDATA_T, double>::value) {
+      ret += "double, ";
+    } else if (std::is_same<VDATA_T, float>::value) {
+      ret += "float, ";
+    } else {
+      LOG(FATAL) << "Vertex data type not supported...";
+    }
+
     if (std::is_same<EDATA_T, EmptyType>::value) {
-      ret += "empty";
+      ret += "empty, ";
     } else if (std::is_same<EDATA_T, double>::value) {
-      ret += "double";
+      ret += "double, ";
     } else if (std::is_same<EDATA_T, float>::value) {
-      ret += "float";
+      ret += "float, ";
     } else {
       LOG(FATAL) << "Edge data type not supported...";
     }
 
     if (_load_strategy == LoadStrategy::kOnlyOut) {
-      ret += "_out";
+      ret += "out";
     } else if (_load_strategy == LoadStrategy::kOnlyIn) {
-      ret += "_in";
+      ret += "in";
     } else if (_load_strategy == LoadStrategy::kBothOutIn) {
-      ret += "_both";
+      ret += "both";
     } else {
       LOG(FATAL) << "Invalid load strategy...";
     }
 
-    using partitioner_t = typename VERTEX_MAP_T::partitioner_t;
-    if (std::is_same<partitioner_t, HashPartitioner<OID_T>>::value) {
-      ret += "_hash";
-    } else if (std::is_same<partitioner_t,
-                            SegmentedPartitioner<OID_T>>::value) {
-      ret += "_seg";
-    }
+    ret += ">";
 
     return ret;
   }
 
-  void Init(fid_t fid, bool directed, std::vector<internal_vertex_t>& vertices,
+  void Init(const CommSpec& comm_spec, bool directed,
+            std::unique_ptr<VertexMap<OID_T, VID_T>>&& vm_ptr,
+            std::vector<internal_vertex_t>& vertices,
             std::vector<edge_t>& edges) override {
-    init(fid, directed);
+    init(comm_spec.fid(), directed, std::move(vm_ptr));
 
     static constexpr VID_T invalid_vid = std::numeric_limits<VID_T>::max();
     {
@@ -365,10 +381,13 @@ class ImmutableEdgecutFragment
   }
 
   template <typename IOADAPTOR_T>
-  void Deserialize(const std::string& prefix, const fid_t fid) {
+  void Deserialize(const CommSpec& comm_spec,
+                   std::unique_ptr<VertexMap<OID_T, VID_T>>&& vm_ptr,
+                   const std::string& prefix) {
+    vm_ptr_ = std::move(vm_ptr);
     char fbuf[1024];
     snprintf(fbuf, sizeof(fbuf), kSerializationFilenameFormat, prefix.c_str(),
-             fid);
+             comm_spec.fid());
     auto io_adaptor =
         std::unique_ptr<IOADAPTOR_T>(new IOADAPTOR_T(std::string(fbuf)));
     io_adaptor->Open();
@@ -722,6 +741,7 @@ class ImmutableEdgecutFragment
   using base_t::fid_;
   using base_t::fnum_;
   using base_t::id_parser_;
+  using base_t::vm_ptr_;
 
   ska::flat_hash_map<VID_T, VID_T, std::hash<VID_T>, std::equal_to<VID_T>,
                      Allocator<std::pair<VID_T, VID_T>>>
diff --git a/grape/fragment/loader.h b/grape/fragment/loader.h
index 167e9a0a..d2abe7af 100644
--- a/grape/fragment/loader.h
+++ b/grape/fragment/loader.h
@@ -21,8 +21,6 @@ limitations under the License.
 
 #include "grape/fragment/ev_fragment_loader.h"
 #include "grape/fragment/ev_fragment_mutator.h"
-#include "grape/fragment/ev_fragment_rebalance_loader.h"
-#include "grape/fragment/partitioner.h"
 #include "grape/io/local_io_adaptor.h"
 
 namespace grape {
@@ -48,19 +46,9 @@ static std::shared_ptr<FRAG_T> LoadGraph(
     const std::string& efile, const std::string& vfile,
     const CommSpec& comm_spec,
     const LoadGraphSpec& spec = DefaultLoadGraphSpec()) {
-  if (spec.rebalance) {
-    std::unique_ptr<
-        EVFragmentRebalanceLoader<FRAG_T, IOADAPTOR_T, LINE_PARSER_T>>
-        loader(
-            new EVFragmentRebalanceLoader<FRAG_T, IOADAPTOR_T, LINE_PARSER_T>(
-                comm_spec));
-    return loader->LoadFragment(efile, vfile, spec);
-  } else {
-    std::unique_ptr<EVFragmentLoader<FRAG_T, IOADAPTOR_T, LINE_PARSER_T>>
-        loader(new EVFragmentLoader<FRAG_T, IOADAPTOR_T, LINE_PARSER_T>(
-            comm_spec));
-    return loader->LoadFragment(efile, vfile, spec);
-  }
+  std::unique_ptr<EVFragmentLoader<FRAG_T, IOADAPTOR_T, LINE_PARSER_T>> loader(
+      new EVFragmentLoader<FRAG_T, IOADAPTOR_T, LINE_PARSER_T>(comm_spec));
+  return loader->LoadFragment(efile, vfile, spec);
 }
 
 template <typename FRAG_T, typename IOADAPTOR_T = LocalIOAdaptor,
diff --git a/grape/fragment/mutable_edgecut_fragment.h b/grape/fragment/mutable_edgecut_fragment.h
index a40e1b97..b666ec3a 100644
--- a/grape/fragment/mutable_edgecut_fragment.h
+++ b/grape/fragment/mutable_edgecut_fragment.h
@@ -25,12 +25,10 @@ limitations under the License.
 #include "grape/graph/vertex.h"
 #include "grape/types.h"
 #include "grape/util.h"
-#include "grape/vertex_map/global_vertex_map.h"
 
 namespace grape {
 
-template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T,
-          typename VERTEX_MAP_T>
+template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T>
 struct MutableEdgecutFragmentTraits {
   using inner_vertices_t = VertexRange<VID_T>;
   using outer_vertices_t = VertexRange<VID_T>;
@@ -46,22 +44,17 @@ struct MutableEdgecutFragmentTraits {
 
   using csr_t = DeMutableCSR<VID_T, Nbr<VID_T, EDATA_T>>;
   using csr_builder_t = DeMutableCSRBuilder<VID_T, Nbr<VID_T, EDATA_T>>;
-  using vertex_map_t = VERTEX_MAP_T;
   using mirror_vertices_t = std::vector<Vertex<VID_T>>;
 };
 
 template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T,
-          LoadStrategy _load_strategy = LoadStrategy::kOnlyOut,
-          typename VERTEX_MAP_T =
-              GlobalVertexMap<OID_T, VID_T, HashPartitioner<OID_T>>>
+          LoadStrategy _load_strategy = LoadStrategy::kOnlyOut>
 class MutableEdgecutFragment
     : public CSREdgecutFragmentBase<
           OID_T, VID_T, VDATA_T, EDATA_T,
-          MutableEdgecutFragmentTraits<OID_T, VID_T, VDATA_T, EDATA_T,
-                                       VERTEX_MAP_T>> {
+          MutableEdgecutFragmentTraits<OID_T, VID_T, VDATA_T, EDATA_T>> {
  public:
-  using traits_t = MutableEdgecutFragmentTraits<OID_T, VID_T, VDATA_T, EDATA_T,
-                                                VERTEX_MAP_T>;
+  using traits_t = MutableEdgecutFragmentTraits<OID_T, VID_T, VDATA_T, EDATA_T>;
   using base_t =
       CSREdgecutFragmentBase<OID_T, VID_T, VDATA_T, EDATA_T, traits_t>;
   using internal_vertex_t = internal::Vertex<VID_T, VDATA_T>;
@@ -73,8 +66,6 @@ class MutableEdgecutFragment
   using vdata_t = VDATA_T;
   using edata_t = EDATA_T;
 
-  using vertex_map_t = typename traits_t::vertex_map_t;
-
   using IsEdgeCut = std::true_type;
   using IsVertexCut = std::false_type;
 
@@ -96,19 +87,73 @@ class MutableEdgecutFragment
   template <typename T>
   using vertex_array_t = VertexArray<vertices_t, T>;
 
-  explicit MutableEdgecutFragment(std::shared_ptr<vertex_map_t> vm_ptr)
-      : FragmentBase<OID_T, VID_T, VDATA_T, EDATA_T, traits_t>(vm_ptr) {}
+  MutableEdgecutFragment()
+      : FragmentBase<OID_T, VID_T, VDATA_T, EDATA_T, traits_t>() {}
   virtual ~MutableEdgecutFragment() = default;
 
   using base_t::buildCSR;
   using base_t::init;
   using base_t::IsInnerVertexGid;
 
-  static std::string type_info() { return ""; }
+  static std::string type_info() {
+    std::string ret = "MutableEdgecutFragment<";
+    if (std::is_same<OID_T, int64_t>::value) {
+      ret += "int64_t, ";
+    } else if (std::is_same<OID_T, int32_t>::value) {
+      ret += "int32_t, ";
+    } else if (std::is_same<OID_T, std::string>::value) {
+      ret += "std::string, ";
+    } else {
+      LOG(FATAL) << "OID_T type not supported...";
+    }
+
+    if (std::is_same<VID_T, uint64_t>::value) {
+      ret += "uint64_t, ";
+    } else if (std::is_same<VID_T, uint32_t>::value) {
+      ret += "uint32_t, ";
+    } else {
+      LOG(FATAL) << "VID_T type not supported...";
+    }
+
+    if (std::is_same<VDATA_T, EmptyType>::value) {
+      ret += "empty, ";
+    } else if (std::is_same<VDATA_T, double>::value) {
+      ret += "double, ";
+    } else if (std::is_same<VDATA_T, float>::value) {
+      ret += "float, ";
+    } else {
+      LOG(FATAL) << "Vertex data type not supported...";
+    }
+
+    if (std::is_same<EDATA_T, EmptyType>::value) {
+      ret += "empty, ";
+    } else if (std::is_same<EDATA_T, double>::value) {
+      ret += "double, ";
+    } else if (std::is_same<EDATA_T, float>::value) {
+      ret += "float, ";
+    } else {
+      LOG(FATAL) << "Edge data type not supported...";
+    }
+
+    if (_load_strategy == LoadStrategy::kOnlyOut) {
+      ret += "out";
+    } else if (_load_strategy == LoadStrategy::kOnlyIn) {
+      ret += "in";
+    } else if (_load_strategy == LoadStrategy::kBothOutIn) {
+      ret += "both";
+    } else {
+      LOG(FATAL) << "Invalid load strategy...";
+    }
+
+    ret += ">";
+    return ret;
+  }
 
-  void Init(fid_t fid, bool directed, std::vector<internal_vertex_t>& vertices,
+  void Init(const CommSpec& comm_spec, bool directed,
+            std::unique_ptr<VertexMap<OID_T, VID_T>>&& vm_ptr,
+            std::vector<internal_vertex_t>& vertices,
             std::vector<edge_t>& edges) override {
-    init(fid, directed);
+    init(comm_spec.fid(), directed, std::move(vm_ptr));
 
     ovnum_ = 0;
     static constexpr VID_T invalid_vid = std::numeric_limits<VID_T>::max();
@@ -387,10 +432,13 @@ class MutableEdgecutFragment
   }
 
   template <typename IOADAPTOR_T>
-  void Deserialize(const std::string& prefix, const fid_t fid) {
+  void Deserialize(const CommSpec& comm_spec,
+                   std::unique_ptr<VertexMap<OID_T, VID_T>>&& vm_ptr,
+                   const std::string& prefix) {
+    vm_ptr_ = std::move(vm_ptr);
     char fbuf[1024];
     snprintf(fbuf, sizeof(fbuf), kSerializationFilenameFormat, prefix.c_str(),
-             fid);
+             comm_spec.fid());
     auto io_adaptor =
         std::unique_ptr<IOADAPTOR_T>(new IOADAPTOR_T(std::string(fbuf)));
     io_adaptor->Open();
diff --git a/grape/fragment/rebalancer.h b/grape/fragment/rebalancer.h
new file mode 100644
index 00000000..103cfc65
--- /dev/null
+++ b/grape/fragment/rebalancer.h
@@ -0,0 +1,170 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_FRAGMENT_REBALANCER_H_
+#define GRAPE_FRAGMENT_REBALANCER_H_
+
+#include <memory>
+
+#include "grape/types.h"
+#include "grape/vertex_map/vertex_map.h"
+
+namespace grape {
+
+template <typename OID_T, typename VID_T>
+class Rebalancer {
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+  using vid_t = VID_T;
+
+ public:
+  Rebalancer(int vertex_factor,
+             std::unique_ptr<VertexMap<OID_T, VID_T>>&& vertex_map)
+      : vertex_factor_(vertex_factor), vertex_map_(std::move(vertex_map)) {
+    fid_t fnum = vertex_map_->GetFragmentNum();
+    id_parser_.init(fnum);
+    degree_.resize(fnum);
+    for (fid_t i = 0; i < fnum; ++i) {
+      degree_[i].resize(vertex_map_->GetInnerVertexSize(i), 0);
+    }
+  }
+
+  void inc_degree(const OID_T& oid) {
+    VID_T gid;
+    if (vertex_map_->GetGid(oid, gid)) {
+      fid_t fid = id_parser_.get_fragment_id(gid);
+      vid_t lid = id_parser_.get_local_id(gid);
+      ++degree_[fid][lid];
+    }
+  }
+
+  void finish(const CommSpec& comm_spec,
+              VertexMap<OID_T, VID_T>& new_vertex_map) {
+    fid_t fnum = vertex_map_->GetFragmentNum();
+    fid_t self_fid = comm_spec.fid();
+    for (auto& deg : degree_) {
+      MPI_Allreduce(MPI_IN_PLACE, deg.data(), deg.size(), MPI_INT, MPI_SUM,
+                    comm_spec.comm());
+    }
+    size_t total_score = 0;
+    std::vector<size_t> frag_scores_before, frag_scores_after;
+    for (auto& vec : degree_) {
+      size_t cur_score = vec.size() * vertex_factor_;
+      for (auto deg : vec) {
+        cur_score += deg;
+      }
+
+      frag_scores_before.push_back(cur_score);
+      total_score += cur_score;
+    }
+    size_t expected_score = (total_score + fnum - 1) / fnum;
+    std::vector<OID_T> native_oids;
+    std::unique_ptr<IPartitioner<OID_T>> new_partitioner(nullptr);
+    if (vertex_map_->GetPartitioner().type() ==
+        PartitionerType::kMapPartitioner) {
+      fid_t cur_fid = 0;
+      size_t cur_score = 0;
+
+      new_partitioner = std::unique_ptr<MapPartitioner<OID_T>>(
+          new MapPartitioner<OID_T>(fnum));
+      frag_scores_after.resize(fnum, 0);
+      for (fid_t i = 0; i < fnum; ++i) {
+        vid_t vnum = vertex_map_->GetInnerVertexSize(i);
+        for (vid_t j = 0; j < vnum; ++j) {
+          OID_T cur_oid;
+          CHECK(vertex_map_->GetOid(i, j, cur_oid));
+          new_partitioner->SetPartitionId(internal_oid_t(cur_oid), cur_fid);
+          if (cur_fid == self_fid) {
+            native_oids.push_back(cur_oid);
+          }
+
+          size_t v_score = degree_[i][j] + vertex_factor_;
+          frag_scores_after[cur_fid] += v_score;
+
+          cur_score += v_score;
+          if (cur_score > expected_score && cur_fid < (fnum - 1)) {
+            ++cur_fid;
+            cur_score = 0;
+          }
+        }
+      }
+      CHECK_LE(cur_fid, fnum);
+    } else if (vertex_map_->GetPartitioner().type() ==
+               PartitionerType::kSegmentedPartitioner) {
+      size_t cur_score = 0;
+      fid_t cur_fid = 0;
+      bool is_boundary = false;
+      std::vector<OID_T> boundaries;
+      frag_scores_after.resize(fnum, 0);
+      for (fid_t i = 0; i < fnum; ++i) {
+        std::vector<std::pair<OID_T, vid_t>> frag_vertices;
+        vid_t vnum = vertex_map_->GetInnerVertexSize(i);
+        frag_vertices.reserve(vnum);
+        for (vid_t j = 0; j < vnum; ++j) {
+          OID_T cur_oid;
+          CHECK(vertex_map_->GetOid(i, j, cur_oid));
+          frag_vertices.emplace_back(cur_oid, j);
+        }
+        std::sort(
+            frag_vertices.begin(), frag_vertices.end(),
+            [](const std::pair<OID_T, vid_t>& a,
+               const std::pair<OID_T, vid_t>& b) { return a.first < b.first; });
+
+        for (auto& pair : frag_vertices) {
+          if (is_boundary) {
+            boundaries.push_back(pair.first);
+            is_boundary = false;
+          }
+          if (cur_fid == self_fid) {
+            native_oids.push_back(pair.first);
+          }
+          frag_scores_after[cur_fid] +=
+              (degree_[i][pair.second] + vertex_factor_);
+          cur_score += (degree_[i][pair.second] + vertex_factor_);
+          if (cur_score >= expected_score && cur_fid < (fnum - 1)) {
+            is_boundary = true;
+            ++cur_fid;
+            cur_score = 0;
+          }
+        }
+      }
+      CHECK_EQ(boundaries.size(), fnum - 1);
+      new_partitioner = std::unique_ptr<SegmentedPartitioner<OID_T>>(
+          new SegmentedPartitioner<OID_T>(boundaries));
+    } else {
+      LOG(FATAL) << "Unsupported partitioner type - "
+                 << static_cast<int>(vertex_map_->GetPartitioner().type());
+    }
+    IdxerType idxer_type = vertex_map_->idxer_type();
+    CHECK(idxer_type != IdxerType::kLocalIdxer)
+        << "Rebalancer only supports global vertex map";
+    VertexMapBuilder<OID_T, VID_T> builder(
+        self_fid, fnum, std::move(new_partitioner), idxer_type);
+    for (auto& oid : native_oids) {
+      builder.add_vertex(oid);
+    }
+    builder.finish(comm_spec, new_vertex_map);
+  }
+
+ private:
+  int vertex_factor_;
+  std::unique_ptr<VertexMap<OID_T, VID_T>> vertex_map_;
+  IdParser<VID_T> id_parser_;
+
+  std::vector<std::vector<int>> degree_;
+};
+
+}  // namespace grape
+
+#endif  // GRAPE_FRAGMENT_REBALANCER_H_
diff --git a/grape/graph/id_indexer.h b/grape/graph/id_indexer.h
index 0be9e7f8..923e8fdf 100644
--- a/grape/graph/id_indexer.h
+++ b/grape/graph/id_indexer.h
@@ -21,8 +21,11 @@ limitations under the License.
 #include <vector>
 
 #include "flat_hash_map/flat_hash_map.hpp"
+#include "grape/communication/sync_comm.h"
 #include "grape/config.h"
+#include "grape/io/io_adaptor_base.h"
 #include "grape/types.h"
+#include "grape/utils/ref_vector.h"
 #include "grape/utils/string_view_vector.h"
 
 namespace grape {
@@ -32,118 +35,249 @@ namespace id_indexer_impl {
 static constexpr int8_t min_lookups = 4;
 static constexpr double max_load_factor = 0.5f;
 
-inline int8_t log2(size_t value) {
-  static constexpr int8_t table[64] = {
-      63, 0,  58, 1,  59, 47, 53, 2,  60, 39, 48, 27, 54, 33, 42, 3,
-      61, 51, 37, 40, 49, 18, 28, 20, 55, 30, 34, 11, 43, 14, 22, 4,
-      62, 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19, 29, 10, 13, 21,
-      56, 45, 25, 31, 35, 16, 9,  12, 44, 24, 15, 8,  23, 7,  6,  5};
-  value |= value >> 1;
-  value |= value >> 2;
-  value |= value >> 4;
-  value |= value >> 8;
-  value |= value >> 16;
-  value |= value >> 32;
-  return table[((value - (value >> 1)) * 0x07EDD5E59A4E28C2) >> 58];
+template <typename T>
+size_t vec_dump_bytes(T const& vec) {
+  return vec.size() * sizeof(vec.front()) + sizeof(typename T::size_type);
 }
 
 template <typename T>
 struct KeyBuffer {
-  using type = std::vector<T, Allocator<T>>;
+ public:
+  KeyBuffer() = default;
+  ~KeyBuffer() = default;
+
+  const T& get(size_t idx) const { return inner_[idx]; }
+  void set(size_t idx, const T& val) { inner_[idx] = val; }
+
+  void push_back(const T& val) { inner_.push_back(val); }
+
+  size_t size() const { return inner_.size(); }
+
+  std::vector<T, Allocator<T>>& buffer() { return inner_; }
+  const std::vector<T, Allocator<T>>& buffer() const { return inner_; }
 
   template <typename IOADAPTOR_T>
-  static void serialize(std::unique_ptr<IOADAPTOR_T>& writer, type& buffer) {
-    size_t size = buffer.size();
+  void serialize(std::unique_ptr<IOADAPTOR_T>& writer) const {
+    size_t size = inner_.size();
     CHECK(writer->Write(&size, sizeof(size_t)));
     if (size > 0) {
-      CHECK(writer->Write(buffer.data(), size * sizeof(T)));
+      CHECK(writer->Write(const_cast<T*>(inner_.data()), size * sizeof(T)));
     }
   }
 
+  void serialize_to_mem(std::vector<char>& buf) const {
+    encode_vec(inner_, buf);
+  }
+
   template <typename IOADAPTOR_T>
-  static void deserialize(std::unique_ptr<IOADAPTOR_T>& reader, type& buffer) {
+  void deserialize(std::unique_ptr<IOADAPTOR_T>& reader) {
     size_t size;
     CHECK(reader->Read(&size, sizeof(size_t)));
     if (size > 0) {
-      buffer.resize(size);
-      CHECK(reader->Read(buffer.data(), size * sizeof(T)));
+      inner_.resize(size);
+      CHECK(reader->Read(inner_.data(), size * sizeof(T)));
     }
   }
 
-  static void SendTo(const type& buffer, int dst_worker_id, int tag,
-                     MPI_Comm comm) {
-    sync_comm::Send(buffer, dst_worker_id, tag, comm);
+  void swap(KeyBuffer& rhs) { inner_.swap(rhs.inner_); }
+
+  void clear() { inner_.clear(); }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load_vec(inner_);
   }
 
-  static void RecvFrom(type& buffer, int src_worker_id, int tag,
-                       MPI_Comm comm) {
-    sync_comm::Recv(buffer, src_worker_id, tag, comm);
+  template <typename Dumper>
+  void dump(Dumper& dumper) const {
+    dumper.dump_vec(inner_);
   }
+
+  size_t dump_size() const { return vec_dump_bytes(inner_); }
+
+ private:
+  std::vector<T, Allocator<T>> inner_;
 };
 
 template <>
 struct KeyBuffer<nonstd::string_view> {
-  using type = StringViewVector;
+  KeyBuffer() = default;
+  ~KeyBuffer() = default;
+
+  nonstd::string_view get(size_t idx) const { return inner_[idx]; }
+
+  void push_back(const nonstd::string_view& val) { inner_.push_back(val); }
+
+  size_t size() const { return inner_.size(); }
+
+  StringViewVector& buffer() { return inner_; }
+  const StringViewVector& buffer() const { return inner_; }
 
   template <typename IOADAPTOR_T>
-  static void serialize(std::unique_ptr<IOADAPTOR_T>& writer, type& buffer) {
-    size_t content_buffer_size = buffer.content_buffer().size();
-    CHECK(writer->Write(&content_buffer_size, sizeof(size_t)));
-    if (content_buffer_size > 0) {
-      CHECK(writer->Write(buffer.content_buffer().data(),
-                          content_buffer_size * sizeof(char)));
-    }
-    size_t offset_buffer_size = buffer.offset_buffer().size();
-    CHECK(writer->Write(&offset_buffer_size, sizeof(size_t)));
-    if (offset_buffer_size > 0) {
-      CHECK(writer->Write(buffer.offset_buffer().data(),
-                          offset_buffer_size * sizeof(size_t)));
-    }
+  void serialize(std::unique_ptr<IOADAPTOR_T>& writer) const {
+    inner_.serialize(writer);
+  }
+
+  void serialize_to_mem(std::vector<char>& buf) const {
+    inner_.serialize_to_mem(buf);
   }
 
   template <typename IOADAPTOR_T>
-  static void deserialize(std::unique_ptr<IOADAPTOR_T>& reader, type& buffer) {
-    size_t content_buffer_size;
-    CHECK(reader->Read(&content_buffer_size, sizeof(size_t)));
-    if (content_buffer_size > 0) {
-      buffer.content_buffer().resize(content_buffer_size);
-      CHECK(reader->Read(buffer.content_buffer().data(),
-                         content_buffer_size * sizeof(char)));
-    }
-    size_t offset_buffer_size;
-    CHECK(reader->Read(&offset_buffer_size, sizeof(size_t)));
-    if (offset_buffer_size > 0) {
-      buffer.offset_buffer().resize(offset_buffer_size);
-      CHECK(reader->Read(buffer.offset_buffer().data(),
-                         offset_buffer_size * sizeof(size_t)));
-    }
+  void deserialize(std::unique_ptr<IOADAPTOR_T>& reader) {
+    inner_.deserialize(reader);
+  }
+
+  void swap(KeyBuffer& rhs) { inner_.swap(rhs.inner_); }
+
+  void clear() { inner_.clear(); }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load_vec(inner_.content_buffer());
+    loader.load_vec(inner_.offset_buffer());
   }
 
-  static void SendTo(const type& buffer, int dst_worker_id, int tag,
-                     MPI_Comm comm) {
-    sync_comm::Send(buffer, dst_worker_id, tag, comm);
+  template <typename Dumper>
+  void dump(Dumper& dumper) const {
+    dumper.dump_vec(inner_.content_buffer());
+    dumper.dump_vec(inner_.offset_buffer());
   }
 
-  static void RecvFrom(type& buffer, int src_worker_id, int tag,
-                       MPI_Comm comm) {
-    sync_comm::Recv(buffer, src_worker_id, tag, comm);
+  size_t dump_size() const {
+    return vec_dump_bytes(inner_.content_buffer()) +
+           vec_dump_bytes(inner_.offset_buffer());
   }
+
+ private:
+  StringViewVector inner_;
+};
+
+#if __cplusplus >= 201703L
+template <>
+struct KeyBuffer<std::string_view> {
+  KeyBuffer() = default;
+  ~KeyBuffer() = default;
+
+  std::string_view get(size_t idx) const {
+    std::string_view view(inner_[idx].data(), inner_[idx].size());
+    return view;
+  }
+
+  void push_back(const std::string_view& val) {
+    nonstd::string_view view(val.data(), val.size());
+    inner_.push_back(view);
+  }
+
+  size_t size() const { return inner_.size(); }
+
+  StringViewVector& buffer() { return inner_; }
+  const StringViewVector& buffer() const { return inner_; }
+
+  void swap(KeyBuffer& rhs) { inner_.swap(rhs.inner_); }
+
+  void clear() { inner_.clear(); }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load_vec(inner_.content_buffer());
+    loader.load_vec(inner_.offset_buffer());
+  }
+
+  template <typename Dumper>
+  void dump(Dumper& dumper) const {
+    dumper.dump_vec(inner_.content_buffer());
+    dumper.dump_vec(inner_.offset_buffer());
+  }
+
+  size_t dump_size() {
+    return vec_dump_bytes(inner_.content_buffer()) +
+           vec_dump_bytes(inner_.offset_buffer());
+  }
+
+ private:
+  StringViewVector inner_;
+};
+#endif
+
+template <typename T>
+struct KeyBufferView {
+ public:
+  KeyBufferView() {}
+
+  size_t init(const void* buffer, size_t size) {
+    return inner_.init(buffer, size);
+  }
+
+  T get(size_t idx) const { return inner_.get(idx); }
+
+  size_t size() const { return inner_.size(); }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    inner_.load(loader);
+  }
+
+ private:
+  ref_vector<T> inner_;
 };
 
 }  // namespace id_indexer_impl
 
+namespace sync_comm {
+
+template <typename T>
+struct CommImpl<id_indexer_impl::KeyBuffer<T>> {
+  static void send(const id_indexer_impl::KeyBuffer<T>& buf, int dst_worker_id,
+                   int tag, MPI_Comm comm) {
+    Send(buf.buffer(), dst_worker_id, tag, comm);
+  }
+
+  static void recv(id_indexer_impl::KeyBuffer<T>& buf, int src_worker_id,
+                   int tag, MPI_Comm comm) {
+    Recv(buf.buffer(), src_worker_id, tag, comm);
+  }
+};
+
+template <>
+struct CommImpl<id_indexer_impl::KeyBuffer<nonstd::string_view>> {
+  static void send(const id_indexer_impl::KeyBuffer<nonstd::string_view>& buf,
+                   int dst_worker_id, int tag, MPI_Comm comm) {
+    Send(buf.buffer(), dst_worker_id, tag, comm);
+  }
+
+  static void recv(id_indexer_impl::KeyBuffer<nonstd::string_view>& buf,
+                   int src_worker_id, int tag, MPI_Comm comm) {
+    Recv(buf.buffer(), src_worker_id, tag, comm);
+  }
+};
+
+}  // namespace sync_comm
+
 template <typename KEY_T, typename INDEX_T>
 class IdIndexer {
  public:
-  using key_buffer_t = typename id_indexer_impl::KeyBuffer<KEY_T>::type;
+  using key_buffer_t = typename id_indexer_impl::KeyBuffer<KEY_T>;
   using ind_buffer_t = std::vector<INDEX_T, Allocator<INDEX_T>>;
   using dist_buffer_t = std::vector<int8_t, Allocator<int8_t>>;
 
   IdIndexer() : hasher_() { reset_to_empty_state(); }
-  ~IdIndexer() {}
+  IdIndexer(IdIndexer&& rhs) { swap(rhs); }
+  ~IdIndexer() = default;
+
+  IdIndexer& operator=(IdIndexer&& rhs) {
+    swap(rhs);
+    return *this;
+  }
 
   size_t entry_num() const { return distances_.size(); }
 
+  size_t memory_usage() const {
+    size_t ret = keys_.dump_size();
+    ret += indices_.size() * sizeof(INDEX_T);
+    ret += distances_.size() * sizeof(int8_t);
+    return ret;
+  }
+
   bool add(const KEY_T& oid, INDEX_T& lid) {
     size_t index =
         hash_policy_.index_for_hash(hasher_(oid), num_slots_minus_one_);
@@ -152,7 +286,7 @@ class IdIndexer {
     for (; distances_[index] >= distance_from_desired;
          ++index, ++distance_from_desired) {
       INDEX_T cur_lid = indices_[index];
-      if (keys_[cur_lid] == oid) {
+      if (keys_.get(cur_lid) == oid) {
         lid = cur_lid;
         return false;
       }
@@ -174,7 +308,7 @@ class IdIndexer {
     for (; distances_[index] >= distance_from_desired;
          ++index, ++distance_from_desired) {
       INDEX_T cur_lid = indices_[index];
-      if (keys_[cur_lid] == oid) {
+      if (keys_.get(cur_lid) == oid) {
         lid = cur_lid;
         return false;
       }
@@ -196,7 +330,7 @@ class IdIndexer {
     for (; distances_[index] >= distance_from_desired;
          ++index, ++distance_from_desired) {
       INDEX_T cur_lid = indices_[index];
-      if (keys_[cur_lid] == oid) {
+      if (keys_.get(cur_lid) == oid) {
         lid = cur_lid;
         return false;
       }
@@ -218,7 +352,7 @@ class IdIndexer {
     for (; distances_[index] >= distance_from_desired;
          ++index, ++distance_from_desired) {
       INDEX_T cur_lid = indices_[index];
-      if (keys_[cur_lid] == oid) {
+      if (keys_.get(cur_lid) == oid) {
         lid = cur_lid;
         return false;
       }
@@ -239,7 +373,7 @@ class IdIndexer {
     int8_t distance_from_desired = 0;
     for (; distances_[index] >= distance_from_desired;
          ++index, ++distance_from_desired) {
-      if (keys_[indices_[index]] == oid) {
+      if (keys_.get(indices_[index]) == oid) {
         return;
       }
     }
@@ -258,7 +392,7 @@ class IdIndexer {
     int8_t distance_from_desired = 0;
     for (; distances_[index] >= distance_from_desired;
          ++index, ++distance_from_desired) {
-      if (keys_[indices_[index]] == oid) {
+      if (keys_.get(indices_[index]) == oid) {
         return;
       }
     }
@@ -282,7 +416,7 @@ class IdIndexer {
     if (lid >= num_elements_) {
       return false;
     }
-    oid = keys_[lid];
+    oid = keys_.get(lid);
     return true;
   }
 
@@ -292,7 +426,7 @@ class IdIndexer {
     for (int8_t distance = 0; distances_[index] >= distance;
          ++distance, ++index) {
       INDEX_T ret = indices_[index];
-      if (keys_[ret] == oid) {
+      if (keys_.get(ret) == oid) {
         lid = ret;
         return true;
       }
@@ -305,7 +439,7 @@ class IdIndexer {
     for (int8_t distance = 0; distances_[index] >= distance;
          ++distance, ++index) {
       INDEX_T ret = indices_[index];
-      if (keys_[ret] == oid) {
+      if (keys_.get(ret) == oid) {
         lid = ret;
         return true;
       }
@@ -332,49 +466,73 @@ class IdIndexer {
 
   template <typename IOADAPTOR_T>
   void Serialize(std::unique_ptr<IOADAPTOR_T>& writer) {
-    id_indexer_impl::KeyBuffer<KEY_T>::serialize(writer, keys_);
-    InArchive arc;
-    arc << hash_policy_.get_mod_function_index() << max_lookups_
-        << num_elements_ << num_slots_minus_one_ << indices_.size()
-        << distances_.size();
-    CHECK(writer->WriteArchive(arc));
-    arc.Clear();
-
+    keys_.serialize(writer);
+
+    size_t mod_function_index = hash_policy_.get_mod_function_index();
+    int8_t max_lookups_val = max_lookups_;
+    size_t num_elements_val = num_elements_;
+    size_t num_slots_minus_one_val = num_slots_minus_one_;
+    CHECK(writer->Write(&mod_function_index, sizeof(size_t)));
+    CHECK(writer->Write(&max_lookups_val, sizeof(int8_t)));
+    CHECK(writer->Write(&num_elements_val, sizeof(size_t)));
+    CHECK(writer->Write(&num_slots_minus_one_val, sizeof(size_t)));
+
+    size_t indices_size = indices_.size();
+    CHECK(writer->Write(&indices_size, sizeof(size_t)));
     if (indices_.size() > 0) {
-      CHECK(writer->Write(indices_.data(), indices_.size() * sizeof(INDEX_T)));
+      CHECK(writer->Write(const_cast<INDEX_T*>(indices_.data()),
+                          indices_size * sizeof(INDEX_T)));
     }
+    size_t distances_size = distances_.size();
+    CHECK(writer->Write(&distances_size, sizeof(size_t)));
     if (distances_.size() > 0) {
-      CHECK(
-          writer->Write(distances_.data(), distances_.size() * sizeof(int8_t)));
+      CHECK(writer->Write(const_cast<int8_t*>(distances_.data()),
+                          distances_size * sizeof(int8_t)));
     }
   }
 
   template <typename IOADAPTOR_T>
   void Deserialize(std::unique_ptr<IOADAPTOR_T>& reader) {
-    id_indexer_impl::KeyBuffer<KEY_T>::deserialize(reader, keys_);
-    OutArchive arc;
-    CHECK(reader->ReadArchive(arc));
-    size_t mod_function_index;
-    size_t indices_size, distances_size;
-    arc >> mod_function_index >> max_lookups_ >> num_elements_ >>
-        num_slots_minus_one_ >> indices_size >> distances_size;
-    arc.Clear();
+    keys_.deserialize(reader);
 
+    size_t mod_function_index;
+    CHECK(reader->Read(&mod_function_index, sizeof(size_t)));
     hash_policy_.set_mod_function_by_index(mod_function_index);
+    CHECK(reader->Read(&max_lookups_, sizeof(int8_t)));
+    CHECK(reader->Read(&num_elements_, sizeof(size_t)));
+    CHECK(reader->Read(&num_slots_minus_one_, sizeof(size_t)));
+
+    size_t indices_size;
+    CHECK(reader->Read(&indices_size, sizeof(size_t)));
     indices_.resize(indices_size);
-    distances_.resize(distances_size);
     if (indices_size > 0) {
       CHECK(reader->Read(indices_.data(), indices_.size() * sizeof(INDEX_T)));
     }
+
+    size_t distances_size;
+    CHECK(reader->Read(&distances_size, sizeof(size_t)));
+    distances_.resize(distances_size);
     if (distances_size > 0) {
       CHECK(
           reader->Read(distances_.data(), distances_.size() * sizeof(int8_t)));
     }
   }
 
+  void serialize_to_mem(std::vector<char>& buf) const {
+    keys_.serialize_to_mem(buf);
+    size_t mod_function_index = hash_policy_.get_mod_function_index();
+    encode_val(mod_function_index, buf);
+    encode_val(max_lookups_, buf);
+    encode_val(num_elements_, buf);
+    encode_val(num_slots_minus_one_, buf);
+
+    encode_vec(indices_, buf);
+    encode_vec(distances_, buf);
+  }
+
  private:
   void emplace(INDEX_T lid) {
-    KEY_T key = keys_[lid];
+    KEY_T key = keys_.get(lid);
     size_t index =
         hash_policy_.index_for_hash(hasher_(key), num_slots_minus_one_);
     int8_t distance_from_desired = 0;
@@ -484,7 +642,7 @@ class IdIndexer {
   }
 
   static int8_t compute_max_lookups(size_t num_buckets) {
-    int8_t desired = id_indexer_impl::log2(num_buckets);
+    int8_t desired = ska::detailv3::log2(num_buckets);
     return std::max(id_indexer_impl::min_lookups, desired);
   }
 
@@ -503,6 +661,91 @@ class IdIndexer {
   std::hash<KEY_T> hasher_;
 };
 
+template <typename KEY_T, typename INDEX_T>
+class IdIndexerView {
+ public:
+  IdIndexerView() : hasher_() {}
+  ~IdIndexerView() = default;
+
+  void Init(const void* data, size_t size) {
+    const char* ptr = reinterpret_cast<const char*>(data);
+    size_t cur = keys_.init(ptr, size);
+    ptr += cur;
+
+    size_t mod_function_index;
+    ptr = decode_val(mod_function_index, ptr);
+    hash_policy_.set_mod_function_by_index(mod_function_index);
+
+    ptr = decode_val(max_lookups_, ptr);
+    ptr = decode_val(num_elements_, ptr);
+    ptr = decode_val(num_slots_minus_one_, ptr);
+
+    size_t used_size = ptr - reinterpret_cast<const char*>(data);
+    size -= used_size;
+
+    cur = indices_.init(ptr, size);
+    ptr += cur;
+    size -= cur;
+
+    distances_.init(ptr, size);
+  }
+
+  size_t entry_num() const { return distances_.size(); }
+
+  size_t bucket_count() const {
+    return num_slots_minus_one_ ? num_slots_minus_one_ + 1 : 0;
+  }
+
+  size_t size() const { return num_elements_; }
+
+  bool get_key(INDEX_T lid, KEY_T& oid) const {
+    if (lid >= num_elements_) {
+      return false;
+    }
+    oid = keys_.get(lid);
+    return true;
+  }
+
+  bool get_index(const KEY_T& oid, INDEX_T& lid) const {
+    size_t index =
+        hash_policy_.index_for_hash(hasher_(oid), num_slots_minus_one_);
+    for (int8_t distance = 0; distances_.get(index) >= distance;
+         ++distance, ++index) {
+      INDEX_T ret = indices_.get(index);
+      if (keys_.get(ret) == oid) {
+        lid = ret;
+        return true;
+      }
+    }
+    return false;
+  }
+
+  bool _get_index(const KEY_T& oid, size_t hash, INDEX_T& lid) const {
+    size_t index = hash_policy_.index_for_hash(hash, num_slots_minus_one_);
+    for (int8_t distance = 0; distances_.get(index) >= distance;
+         ++distance, ++index) {
+      INDEX_T ret = indices_.get(index);
+      if (keys_.get(ret) == oid) {
+        lid = ret;
+        return true;
+      }
+    }
+    return false;
+  }
+
+ private:
+  typename id_indexer_impl::KeyBufferView<KEY_T> keys_;
+  ref_vector<INDEX_T> indices_;
+  ref_vector<int8_t> distances_;
+
+  ska::ska::prime_number_hash_policy hash_policy_;
+  int8_t max_lookups_ = id_indexer_impl::min_lookups - 1;
+  size_t num_elements_ = 0;
+  size_t num_slots_minus_one_ = 0;
+
+  std::hash<KEY_T> hasher_;
+};
+
 namespace sync_comm {
 
 template <typename OID_T, typename VID_T>
@@ -513,8 +756,7 @@ struct CommImpl<IdIndexer<OID_T, VID_T>> {
     arc << indexer.hash_policy_.get_mod_function_index() << indexer.max_lookups_
         << indexer.num_elements_ << indexer.num_slots_minus_one_;
     Send(arc, dst_worker_id, tag, comm);
-    id_indexer_impl::KeyBuffer<OID_T>::SendTo(indexer.keys_, dst_worker_id, tag,
-                                              comm);
+    Send(indexer.keys_, dst_worker_id, tag, comm);
     Send(indexer.indices_, dst_worker_id, tag, comm);
     Send(indexer.distances_, dst_worker_id, tag, comm);
   }
@@ -527,8 +769,7 @@ struct CommImpl<IdIndexer<OID_T, VID_T>> {
     arc >> mod_function_index >> indexer.max_lookups_ >>
         indexer.num_elements_ >> indexer.num_slots_minus_one_;
     indexer.hash_policy_.set_mod_function_by_index(mod_function_index);
-    id_indexer_impl::KeyBuffer<OID_T>::RecvFrom(indexer.keys_, src_worker_id,
-                                                tag, comm);
+    Recv(indexer.keys_, src_worker_id, tag, comm);
     Recv(indexer.indices_, src_worker_id, tag, comm);
     Recv(indexer.distances_, src_worker_id, tag, comm);
   }
diff --git a/grape/graph/immutable_csr.h b/grape/graph/immutable_csr.h
index b0e73932..3a18a202 100644
--- a/grape/graph/immutable_csr.h
+++ b/grape/graph/immutable_csr.h
@@ -184,6 +184,13 @@ class ImmutableCSR {
   Array<nbr_t, Allocator<nbr_t>>& get_edges_mut() { return edges_; }
   Array<nbr_t*, Allocator<nbr_t*>>& get_offsets_mut() { return offsets_; }
 
+  size_t memory_usage() const {
+    size_t mem = sizeof(*this);
+    mem += offsets_.size() * sizeof(nbr_t*);
+    mem += edges_.size() * sizeof(nbr_t);
+    return mem;
+  }
+
   template <typename IOADAPTOR_T>
   void Serialize(std::unique_ptr<IOADAPTOR_T>& writer) {
     vid_t vnum = vertex_num();
diff --git a/grape/graph/mutable_csr.h b/grape/graph/mutable_csr.h
index 77c6c06c..ba952c8c 100644
--- a/grape/graph/mutable_csr.h
+++ b/grape/graph/mutable_csr.h
@@ -552,20 +552,24 @@ class MutableCSR<VID_T, Nbr<VID_T, EDATA_T>> {
 
     if (std::is_pod<nbr_t>::value) {
       for (vid_t i = 0; i < vnum; ++i) {
-        CHECK(writer->Write(adj_lists_[i].begin,
-                            adj_lists_[i].degree() * sizeof(nbr_t)));
+        if (degree[i] > 0) {
+          CHECK(writer->Write(adj_lists_[i].begin,
+                              adj_lists_[i].degree() * sizeof(nbr_t)));
+        }
       }
     } else {
-      for (vid_t i = 0; i < vnum; ++i) {
-        auto ptr = adj_lists_[i].begin;
-        auto end = adj_lists_[i].end;
-        while (ptr != end) {
-          ia << *ptr;
-          ++ptr;
+      if (edge_num > 0) {
+        for (vid_t i = 0; i < vnum; ++i) {
+          auto ptr = adj_lists_[i].begin;
+          auto end = adj_lists_[i].end;
+          while (ptr != end) {
+            ia << *ptr;
+            ++ptr;
+          }
         }
+        CHECK(writer->WriteArchive(ia));
+        ia.Clear();
       }
-      CHECK(writer->WriteArchive(ia));
-      ia.Clear();
     }
   }
 
@@ -600,23 +604,30 @@ class MutableCSR<VID_T, Nbr<VID_T, EDATA_T>> {
       adj_lists_[i].end = ptr + degree_list[i];
       ptr += capacity_[i];
     }
-    prev_[0] = sentinel;
-    next_[vnum - 1] = sentinel;
+    if (vnum > 0) {
+      prev_[0] = sentinel;
+      next_[vnum - 1] = sentinel;
+    }
+
     if (std::is_pod<nbr_t>::value) {
       for (vid_t i = 0; i < vnum; ++i) {
-        CHECK(reader->Read(ptr, sizeof(nbr_t) * degree_list[i]));
+        if (degree_list[i] > 0) {
+          CHECK(reader->Read(ptr, sizeof(nbr_t) * degree_list[i]));
+        }
       }
     } else {
-      CHECK(reader->ReadArchive(oa));
-      for (vid_t i = 0; i < vnum; ++i) {
-        nbr_t* begin = adj_lists_[i].begin;
-        nbr_t* end = adj_lists_[i].end;
-        while (begin != end) {
-          oa >> *begin;
-          ++begin;
+      if (edge_num > 0) {
+        CHECK(reader->ReadArchive(oa));
+        for (vid_t i = 0; i < vnum; ++i) {
+          nbr_t* begin = adj_lists_[i].begin;
+          nbr_t* end = adj_lists_[i].end;
+          while (begin != end) {
+            oa >> *begin;
+            ++begin;
+          }
         }
+        oa.Clear();
       }
-      oa.Clear();
     }
     buffers_.emplace_back(std::move(buffer));
   }
diff --git a/grape/types.h b/grape/types.h
index 4035a1b7..34138683 100644
--- a/grape/types.h
+++ b/grape/types.h
@@ -20,6 +20,8 @@ limitations under the License.
 #include <ostream>
 #include <type_traits>
 
+#include "pthash/utils/hasher.hpp"
+
 // Use the same setting with apache-arrow to avoid possible conflicts
 #define nssv_CONFIG_SELECT_STRING_VIEW nssv_STRING_VIEW_NONSTD
 #include "string_view/string_view.hpp"
@@ -138,6 +140,31 @@ struct InternalOID<std::string> {
   static std::string FromInternal(const type& val) { return std::string(val); }
 };
 
+struct murmurhasher {
+  typedef pthash::hash64 hash_type;
+
+  // specialization for std::string
+  static inline hash_type hash(std::string const& val, uint64_t seed) {
+    return pthash::MurmurHash2_64(val.data(), val.size(), seed);
+  }
+
+  // specialization for uint64_t
+  static inline hash_type hash(uint64_t val, uint64_t seed) {
+    return pthash::MurmurHash2_64(reinterpret_cast<char const*>(&val),
+                                  sizeof(val), seed);
+  }
+
+  static inline hash_type hash(const nonstd::string_view& val, uint64_t seed) {
+    return pthash::MurmurHash2_64(val.data(), val.size(), seed);
+  }
+
+#if __cplusplus >= 201703L
+  static inline hash_type hash(std::string_view const& val, uint64_t seed) {
+    return pthash::MurmurHash2_64(val.data(), val.size(), seed);
+  }
+#endif
+};
+
 #ifdef __cpp_lib_is_invocable
 template <class T, typename... Args>
 using result_of_t = std::invoke_result_t<T, Args...>;
diff --git a/grape/util.h b/grape/util.h
index 55798af5..5145f1ea 100644
--- a/grape/util.h
+++ b/grape/util.h
@@ -22,6 +22,7 @@ limitations under the License.
 #endif
 #endif
 
+#include <limits.h>
 #include <stdio.h>
 #include <sys/stat.h>
 #include <sys/time.h>
@@ -29,6 +30,7 @@ limitations under the License.
 #include <algorithm>
 #include <cassert>
 #include <fstream>
+#include <iostream>
 #include <map>
 #include <memory>
 #include <string>
@@ -140,11 +142,41 @@ struct IdenticalHasher<uint64_t> {
   static uint64_t hash(uint64_t x) { return x; }
 };
 
-static inline bool exists_file(const std::string& name) {
+inline bool exists_file(const std::string& name) {
   struct stat buffer;
   return (stat(name.c_str(), &buffer) == 0);
 }
 
+inline std::string get_absolute_path(const std::string& path) {
+  char abs_path[PATH_MAX];
+  if (realpath(path.c_str(), abs_path) == nullptr) {
+    LOG(ERROR) << "Failed to get absolute path for " << path;
+    return "";
+  }
+  return std::string(abs_path);
+}
+
+inline bool create_directories(const std::string& path) {
+  char temp_path[256];
+  snprintf(temp_path, sizeof(temp_path), "%s", path.c_str());
+
+  for (char* p = temp_path + 1; *p; ++p) {
+    if (*p == '/') {
+      *p = '\0';
+      if (mkdir(temp_path, 0755) != 0 && errno != EEXIST) {
+        std::cerr << "Error creating directory: " << temp_path << std::endl;
+        return false;
+      }
+      *p = '/';
+    }
+  }
+  if (mkdir(temp_path, 0755) != 0 && errno != EEXIST) {
+    std::cerr << "Error creating directory: " << temp_path << std::endl;
+    return false;
+  }
+  return true;
+}
+
 inline std::vector<std::string> split_string(const std::string& str,
                                              char delimiter) {
   std::vector<std::string> tokens;
diff --git a/grape/utils/concurrent_queue.h b/grape/utils/concurrent_queue.h
index 938e0758..a0312daf 100644
--- a/grape/utils/concurrent_queue.h
+++ b/grape/utils/concurrent_queue.h
@@ -136,6 +136,17 @@ class BlockingQueue {
     }
   }
 
+  bool TryGetAll(std::deque<T>& items) {
+    {
+      std::unique_lock<std::mutex> lk(lock_);
+      if (!queue_.empty()) {
+        std::swap(items, queue_);
+        full_.notify_all();
+      }
+      return (producer_num_ != 0);
+    }
+  }
+
   size_t Size() const { return queue_.size(); }
 
  private:
@@ -156,8 +167,7 @@ class SpinLock {
  public:
   void lock() {
     while (locked.test_and_set(std::memory_order_acquire)) {
-      {
-      }
+      {}
     }
   }
 
diff --git a/grape/utils/pthash_utils/ef_sequence_view.h b/grape/utils/pthash_utils/ef_sequence_view.h
new file mode 100644
index 00000000..0a56d700
--- /dev/null
+++ b/grape/utils/pthash_utils/ef_sequence_view.h
@@ -0,0 +1,149 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GRAPE_UTILS_PTHASH_UTILS_EF_SEQUENCE_VIEW_H_
+#define GRAPE_UTILS_PTHASH_UTILS_EF_SEQUENCE_VIEW_H_
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <type_traits>
+
+#include "grape/utils/ref_vector.h"
+#include "pthash/encoders/util.hpp"
+
+namespace grape {
+
+// This code is an adaptation from
+// https://github.com/jermp/pthash/blob/master/include/encoders/bit_vector.hpp
+struct bit_vector_view {
+  const uint64_t* data() const { return m_bits.data(); }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load(m_size);
+    loader.load_ref_vec(m_bits);
+  }
+
+  size_t m_size;
+  ref_vector<uint64_t> m_bits;
+};
+
+// This code is an adaptation from
+// https://github.com/jermp/pthash/blob/master/include/encoders/darray.hpp
+struct darray1_view {
+  inline uint64_t select(const bit_vector_view& bv, uint64_t idx) const {
+    assert(idx < m_positions);
+    uint64_t block = idx / block_size;
+    int64_t block_pos = m_block_inventory[block];
+    if (block_pos < 0) {  // sparse super-block
+      uint64_t overflow_pos = uint64_t(-block_pos - 1);
+      return m_overflow_positions[overflow_pos + (idx & (block_size - 1))];
+    }
+
+    size_t subblock = idx / subblock_size;
+    size_t start_pos = uint64_t(block_pos) + m_subblock_inventory[subblock];
+    size_t reminder = idx & (subblock_size - 1);
+    if (!reminder) {
+      return start_pos;
+    }
+
+    const uint64_t* data = bv.data();
+    size_t word_idx = start_pos >> 6;
+    size_t word_shift = start_pos & 63;
+    uint64_t word = data[word_idx] & (uint64_t(-1) << word_shift);
+
+    while (true) {
+      size_t popcnt = pthash::util::popcount(word);
+      if (reminder < popcnt) {
+        break;
+      }
+      reminder -= popcnt;
+      word = data[++word_idx];
+    }
+
+    return (word_idx << 6) + pthash::util::select_in_word(word, reminder);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load(m_positions);
+    loader.load_ref_vec(m_block_inventory);
+    loader.load_ref_vec(m_subblock_inventory);
+    loader.load_ref_vec(m_overflow_positions);
+  }
+
+  static const size_t block_size = 1024;  // 2048
+  static const size_t subblock_size = 32;
+  static const size_t max_in_block_distance = 1 << 16;
+
+  size_t m_positions;
+  ref_vector<int64_t> m_block_inventory;
+  ref_vector<uint16_t> m_subblock_inventory;
+  ref_vector<uint64_t> m_overflow_positions;
+};
+
+// This code is an adaptation from
+// https://github.com/jermp/pthash/blob/master/include/encoders/compact_vector.hpp
+struct compact_vector_view {
+  inline uint64_t size() const { return m_size; }
+  inline uint64_t width() const { return m_width; }
+  inline uint64_t access(uint64_t pos) const {
+    assert(pos < size());
+    uint64_t i = pos * m_width;
+    const char* ptr = reinterpret_cast<const char*>(m_bits.data());
+    return (*(reinterpret_cast<uint64_t const*>(ptr + (i >> 3))) >> (i & 7)) &
+           m_mask;
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load(m_size);
+    loader.load(m_width);
+    loader.load(m_mask);
+    loader.load_ref_vec(m_bits);
+  }
+
+  uint64_t m_size;
+  uint64_t m_width;
+  uint64_t m_mask;
+  ref_vector<uint64_t> m_bits;
+};
+
+// This code is an adaptation from
+// https://github.com/jermp/pthash/blob/master/include/encoders/ef_sequence.hpp
+struct ef_sequence_view {
+  uint64_t access(uint64_t i) const {
+    assert(i < m_low_bits.size());
+    return ((m_high_bits_d1.select(m_high_bits, i) - i) << m_low_bits.width()) |
+           m_low_bits.access(i);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    m_high_bits.load(loader);
+    m_high_bits_d1.load(loader);
+    m_low_bits.load(loader);
+  }
+
+  bit_vector_view m_high_bits;
+  darray1_view m_high_bits_d1;
+  compact_vector_view m_low_bits;
+};
+
+}  // namespace grape
+
+#endif  // GRAPE_UTILS_PTHASH_UTILS_EF_SEQUENCE_VIEW_H_
diff --git a/grape/utils/pthash_utils/encoders_view.h b/grape/utils/pthash_utils/encoders_view.h
new file mode 100644
index 00000000..85615c4f
--- /dev/null
+++ b/grape/utils/pthash_utils/encoders_view.h
@@ -0,0 +1,62 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GRAPE_UTILS_PTHASH_UTILS_ENCODERS_VIEW_H_
+#define GRAPE_UTILS_PTHASH_UTILS_ENCODERS_VIEW_H_
+
+#include "grape/utils/pthash_utils/ef_sequence_view.h"
+
+namespace grape {
+
+// This code is an adaptation from
+// https://github.com/jermp/pthash/blob/master/include/encoders/encoders.hpp
+struct dictionary_view {
+  size_t size() const { return m_ranks.size(); }
+  uint64_t access(uint64_t i) const {
+    uint64_t rank = m_ranks.access(i);
+    return m_dict.access(rank);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    m_ranks.load(loader);
+    m_dict.load(loader);
+  }
+
+  compact_vector_view m_ranks;
+  compact_vector_view m_dict;
+};
+
+struct dual_dictionary_view {
+  uint64_t access(uint64_t i) const {
+    if (i < m_front.size()) {
+      return m_front.access(i);
+    }
+    return m_back.access(i - m_front.size());
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    m_front.load(loader);
+    m_back.load(loader);
+  }
+
+  dictionary_view m_front;
+  dictionary_view m_back;
+};
+
+}  // namespace grape
+
+#endif  // GRAPE_UTILS_PTHASH_UTILS_ENCODERS_VIEW_H_
diff --git a/grape/utils/pthash_utils/ph_indexer_view.h b/grape/utils/pthash_utils/ph_indexer_view.h
new file mode 100644
index 00000000..101ede47
--- /dev/null
+++ b/grape/utils/pthash_utils/ph_indexer_view.h
@@ -0,0 +1,81 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_UTILS_PTHASH_UTILS_PH_INDEXER_VIEW_H_
+#define GRAPE_UTILS_PTHASH_UTILS_PH_INDEXER_VIEW_H_
+
+#include "grape/graph/id_indexer.h"
+#include "grape/utils/pthash_utils/single_phf_view.h"
+#include "grape/utils/ref_vector.h"
+
+namespace grape {
+
+template <typename KEY_T, typename INDEX_T>
+class PHIndexerView {
+ public:
+  PHIndexerView() {}
+  ~PHIndexerView() {}
+
+  void init(const void* buffer, size_t size) {
+    buffer_ = buffer;
+    buffer_size_ = size;
+
+    mem_loader loader(reinterpret_cast<const char*>(buffer), size);
+    phf_view_.load(loader);
+    keys_view_.load(loader);
+  }
+
+  size_t entry_num() const { return keys_view_.size(); }
+
+  bool empty() const { return keys_view_.empty(); }
+
+  bool get_key(INDEX_T lid, KEY_T& oid) const {
+    if (lid >= keys_view_.size()) {
+      return false;
+    }
+    oid = keys_view_.get(lid);
+    return true;
+  }
+
+  bool get_index(const KEY_T& oid, INDEX_T& lid) const {
+    auto idx = phf_view_(oid);
+    if (idx < keys_view_.size() && keys_view_.get(idx) == oid) {
+      lid = idx;
+      return true;
+    }
+    return false;
+  }
+
+  size_t size() const { return keys_view_.size(); }
+
+  template <typename IOADAPTOR_T>
+  void Serialize(std::unique_ptr<IOADAPTOR_T>& writer) {
+    writer->Write(&buffer_size_, sizeof(size_t));
+    if (buffer_size_ > 0) {
+      writer->Write(const_cast<void*>(buffer_), buffer_size_);
+    }
+  }
+
+ private:
+  SinglePHFView<murmurhasher> phf_view_;
+  id_indexer_impl::KeyBufferView<KEY_T> keys_view_;
+
+  const void* buffer_;
+  size_t buffer_size_;
+};
+
+}  // namespace grape
+
+#endif  // GRAPE_UTILS_PTHASH_UTILS_PH_INDEXER_VIEW_H_
diff --git a/grape/utils/pthash_utils/single_phf_view.h b/grape/utils/pthash_utils/single_phf_view.h
new file mode 100644
index 00000000..e25aad68
--- /dev/null
+++ b/grape/utils/pthash_utils/single_phf_view.h
@@ -0,0 +1,218 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GRAPE_UTILS_PTHASH_UTILS_SINGLE_PHF_VIEW_H_
+#define GRAPE_UTILS_PTHASH_UTILS_SINGLE_PHF_VIEW_H_
+
+#include "grape/utils/pthash_utils/encoders_view.h"
+#include "pthash/builders/util.hpp"
+#include "pthash/encoders/encoders.hpp"
+#include "pthash/single_phf.hpp"
+#include "pthash/utils/bucketers.hpp"
+#include "pthash/utils/hasher.hpp"
+
+namespace grape {
+
+struct mem_dumper {
+ public:
+  mem_dumper() = default;
+  ~mem_dumper() = default;
+
+  template <typename T>
+  void dump(const T& val) {
+    static_assert(std::is_pod<T>::value, "T must be POD type");
+    const char* ptr = reinterpret_cast<const char*>(&val);
+    buf_.insert(buf_.end(), ptr, ptr + sizeof(T));
+  }
+
+  template <typename T, typename ALLOC_T>
+  void dump_vec(const std::vector<T, ALLOC_T>& vec) {
+    static_assert(std::is_pod<T>::value, "T must be POD type");
+    size_t n = vec.size();
+    dump(n);
+    const char* ptr = reinterpret_cast<const char*>(vec.data());
+    buf_.insert(buf_.end(), ptr, ptr + sizeof(T) * n);
+  }
+
+  const std::vector<char>& buffer() const { return buf_; }
+  std::vector<char>& buffer() { return buf_; }
+
+  size_t size() const { return buf_.size(); }
+
+ private:
+  std::vector<char> buf_;
+};
+
+struct external_mem_dumper {
+ public:
+  external_mem_dumper(void* buf, size_t size) : buf_(buf), size_(size) {}
+
+  ~external_mem_dumper() = default;
+
+  template <typename T>
+  void dump(const T& val) {
+    static_assert(std::is_pod<T>::value, "T must be POD type");
+    const char* ptr = reinterpret_cast<const char*>(&val);
+    if (pos_ + sizeof(T) > size_) {
+      return;
+    }
+    memcpy(reinterpret_cast<char*>(buf_) + pos_, ptr, sizeof(T));
+    pos_ += sizeof(T);
+  }
+
+  template <typename T, typename ALLOC_T>
+  void dump_vec(const std::vector<T, ALLOC_T>& vec) {
+    static_assert(std::is_pod<T>::value, "T must be POD type");
+    size_t n = vec.size();
+    if (pos_ + sizeof(T) * n + sizeof(size_t) > size_) {
+      return;
+    }
+    dump(n);
+    const char* ptr = reinterpret_cast<const char*>(vec.data());
+    memcpy(reinterpret_cast<char*>(buf_) + pos_, ptr, sizeof(T) * n);
+    pos_ += sizeof(T) * n;
+  }
+
+  const void* buffer() const { return buf_; }
+
+  size_t size() const { return size_; }
+
+ private:
+  void* buf_ = nullptr;
+  size_t pos_ = 0;
+  size_t size_ = 0;
+};
+
+struct mem_loader {
+ public:
+  mem_loader(const char* buf, size_t size)
+      : begin_(buf), ptr_(buf), end_(buf + size) {}
+  ~mem_loader() = default;
+
+  template <typename T>
+  void load(T& val) {
+    memcpy(&val, ptr_, sizeof(T));
+    ptr_ += sizeof(T);
+  }
+
+  template <typename T>
+  void load_vec(std::vector<T>& vec) {
+    static_assert(std::is_pod<T>::value, "T must be POD type");
+    size_t n;
+    load(n);
+    vec.resize(n);
+    memcpy(vec.data(), ptr_, n * sizeof(T));
+    ptr_ += (n * sizeof(T));
+  }
+
+  template <typename T>
+  void load_ref_vec(ref_vector<T>& vec) {
+    ptr_ += vec.init(ptr_, end_ - ptr_);
+  }
+
+  const char* data() const { return ptr_; }
+  size_t remaining() const { return end_ - ptr_; }
+  size_t used() const { return ptr_ - begin_; }
+
+ private:
+  const char* begin_;
+  const char* ptr_;
+  const char* end_;
+};
+
+// This code is an adaptation from
+// https://github.com/jermp/pthash/blob/master/include/single_phf.hpp
+template <typename Hasher>
+struct SinglePHFView {
+ public:
+  SinglePHFView() = default;
+  ~SinglePHFView() = default;
+
+  template <typename T>
+  uint64_t operator()(T const& key) const {
+    auto hash = Hasher::hash(key, m_seed);
+    return position(hash);
+  }
+
+  uint64_t position(typename Hasher::hash_type hash) const {
+    uint64_t bucket = m_bucketer.bucket(hash.first());
+    uint64_t pilot = m_pilots.access(bucket);
+    uint64_t hashed_pilot = pthash::default_hash64(pilot, m_seed);
+    uint64_t p =
+        fastmod::fastmod_u64(hash.second() ^ hashed_pilot, m_M, m_table_size);
+    if (PTHASH_LIKELY(p < m_num_keys))
+      return p;
+    return m_free_slots.access(p - m_num_keys);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load(m_seed);
+    loader.load(m_num_keys);
+    loader.load(m_table_size);
+    loader.load(m_M);
+    m_bucketer.load(loader);
+    m_pilots.load(loader);
+    m_free_slots.load(loader);
+  }
+
+  template <typename Iterator, typename Dumper>
+  static void build(Iterator keys, uint64_t n, Dumper& dumper, int thread_num) {
+    pthash::build_configuration config;
+    config.c = 7.0;
+    config.alpha = 0.94;
+    config.num_threads = thread_num;
+    config.minimal_output = true;
+    config.verbose_output = false;
+
+    pthash::single_phf<murmurhasher, pthash::dictionary_dictionary, true> phf;
+    phf.build_in_internal_memory(keys, n, config);
+    std::set<size_t> idx;
+    for (uint64_t k = 0; k < n; ++k) {
+      idx.insert(phf(*keys));
+      ++keys;
+    }
+    phf.dump(dumper);
+  }
+
+  template <typename Iterator>
+  static void build(
+      Iterator keys, uint64_t n,
+      pthash::single_phf<murmurhasher, pthash::dictionary_dictionary, true>&
+          phf,
+      int thread_num) {
+    pthash::build_configuration config;
+    config.c = 7.0;
+    config.alpha = 0.94;
+    config.num_threads = thread_num;
+    config.minimal_output = true;
+    config.verbose_output = false;
+
+    phf.build_in_internal_memory(keys, n, config);
+  }
+
+ private:
+  uint64_t m_seed;
+  uint64_t m_num_keys;
+  uint64_t m_table_size;
+  __uint128_t m_M;
+  pthash::skew_bucketer m_bucketer;
+  dual_dictionary_view m_pilots;
+  ef_sequence_view m_free_slots;
+};
+
+}  // namespace grape
+
+#endif  // GRAPE_UTILS_PTHASH_UTILS_SINGLE_PHF_VIEW_H_
diff --git a/grape/utils/ref_vector.h b/grape/utils/ref_vector.h
new file mode 100644
index 00000000..b0233553
--- /dev/null
+++ b/grape/utils/ref_vector.h
@@ -0,0 +1,85 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GRAPE_UTILS_REF_VECTOR_H_
+#define GRAPE_UTILS_REF_VECTOR_H_
+
+#include <type_traits>
+#include <vector>
+
+#include "grape/types.h"
+
+namespace grape {
+
+template <typename T>
+struct ref_vector {
+  static_assert(std::is_pod<T>::value, "T must be POD type");
+  ref_vector() : buffer_(nullptr), size_(0) {}
+  ~ref_vector() {}
+
+  size_t init(const void* buffer, size_t size) {
+    const void* ptr = buffer;
+    size_ = *reinterpret_cast<const size_t*>(ptr);
+    ptr = reinterpret_cast<const char*>(ptr) + sizeof(size_t);
+    buffer_ = reinterpret_cast<const T*>(ptr);
+    return size_ * sizeof(T) + sizeof(size_t);
+  }
+
+  size_t size() const { return size_; }
+
+  T get(size_t idx) const { return buffer_[idx]; }
+
+  const T* data() const { return buffer_; }
+
+  const T& operator[](size_t idx) const { return buffer_[idx]; }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load_ref_vec(*this);
+  }
+
+ private:
+  const T* buffer_;
+  size_t size_;
+};
+
+template <typename T, typename ALLOC_T>
+void encode_vec(const std::vector<T, ALLOC_T>& vec, std::vector<char>& buf) {
+  size_t old_size = buf.size();
+  size_t vec_size = vec.size();
+  buf.resize(old_size + sizeof(size_t) + vec_size * sizeof(T));
+  char* ptr = buf.data() + old_size;
+  memcpy(ptr, &vec_size, sizeof(size_t));
+  ptr += sizeof(size_t);
+  memcpy(ptr, vec.data(), sizeof(T) * vec_size);
+}
+
+template <typename T>
+void encode_val(const T& val, std::vector<char>& buf) {
+  size_t old_size = buf.size();
+  buf.resize(old_size + sizeof(T));
+  char* ptr = buf.data() + old_size;
+  memcpy(ptr, &val, sizeof(T));
+}
+
+template <typename T>
+const char* decode_val(T& val, const char* buf) {
+  memcpy(&val, buf, sizeof(T));
+  return buf + sizeof(T);
+}
+
+}  // namespace grape
+
+#endif  // GRAPE_UTILS_REF_VECTOR_H_
diff --git a/grape/utils/string_view_vector.h b/grape/utils/string_view_vector.h
index 7adef9ce..5c13804d 100644
--- a/grape/utils/string_view_vector.h
+++ b/grape/utils/string_view_vector.h
@@ -19,9 +19,11 @@ limitations under the License.
 #include <stdlib.h>
 #include <string.h>
 
+#include <memory>
 #include <vector>
 
 #include "grape/types.h"
+#include "grape/utils/ref_vector.h"
 
 namespace grape {
 
@@ -74,11 +76,136 @@ class StringViewVector {
     offsets_.swap(rhs.offsets_);
   }
 
+  template <typename IOADAPTOR_T>
+  void serialize(std::unique_ptr<IOADAPTOR_T>& writer) const {
+    size_t content_buffer_size = content_buffer().size();
+    CHECK(writer->Write(&content_buffer_size, sizeof(size_t)));
+    if (content_buffer_size > 0) {
+      CHECK(writer->Write(const_cast<char*>(content_buffer().data()),
+                          content_buffer_size * sizeof(char)));
+    }
+    size_t offset_buffer_size = offset_buffer().size();
+    CHECK(writer->Write(&offset_buffer_size, sizeof(size_t)));
+    if (offset_buffer_size > 0) {
+      CHECK(writer->Write(const_cast<size_t*>(offset_buffer().data()),
+                          offset_buffer_size * sizeof(size_t)));
+    }
+  }
+
+  template <typename IOADAPTOR_T>
+  void deserialize(std::unique_ptr<IOADAPTOR_T>& reader) {
+    size_t content_buffer_size;
+    CHECK(reader->Read(&content_buffer_size, sizeof(size_t)));
+    if (content_buffer_size > 0) {
+      content_buffer().resize(content_buffer_size);
+      CHECK(reader->Read(content_buffer().data(),
+                         content_buffer_size * sizeof(char)));
+    }
+    size_t offset_buffer_size;
+    CHECK(reader->Read(&offset_buffer_size, sizeof(size_t)));
+    if (offset_buffer_size > 0) {
+      offset_buffer().resize(offset_buffer_size);
+      CHECK(reader->Read(offset_buffer().data(),
+                         offset_buffer_size * sizeof(size_t)));
+    }
+  }
+
+  void serialize_to_mem(std::vector<char>& buf) const {
+    encode_vec(buffer_, buf);
+    encode_vec(offsets_, buf);
+  }
+
  private:
   std::vector<char> buffer_;
   std::vector<size_t> offsets_;
 };
 
+template <>
+struct ref_vector<nonstd::string_view> {
+  ref_vector() {}
+  ~ref_vector() {}
+
+  size_t init(const void* buffer, size_t size) {
+    size_t buffer_size = buffer_.init(buffer, size);
+    const void* ptr = reinterpret_cast<const char*>(buffer) + buffer_size;
+    size_t offset_size = offsets_.init(ptr, size - buffer_size);
+    return buffer_size + offset_size;
+  }
+
+  ref_vector<char>& buffer() { return buffer_; }
+  ref_vector<size_t>& offsets() { return offsets_; }
+
+  const ref_vector<char>& buffer() const { return buffer_; }
+  const ref_vector<size_t>& offsets() const { return offsets_; }
+
+  size_t size() const {
+    if (offsets_.size() == 0) {
+      return 0;
+    }
+    return offsets_.size() - 1;
+  }
+
+  nonstd::string_view get(size_t idx) const {
+    size_t from = offsets_.get(idx);
+    size_t to = offsets_.get(idx + 1);
+    return nonstd::string_view(buffer_.data() + from, to - from);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load_ref_vec(buffer_);
+    loader.load_ref_vec(offsets_);
+  }
+
+ private:
+  ref_vector<char> buffer_;
+  ref_vector<size_t> offsets_;
+};
+
+#if __cplusplus >= 201703L
+template <>
+struct ref_vector<std::string_view> {
+  ref_vector() {}
+  ~ref_vector() {}
+
+  size_t init(const void* buffer, size_t size) {
+    size_t buffer_size = buffer_.init(buffer, size);
+    const void* ptr = reinterpret_cast<const char*>(buffer) + buffer_size;
+    size_t offset_size = offsets_.init(ptr, size - buffer_size);
+    return buffer_size + offset_size;
+  }
+
+  ref_vector<char>& buffer() { return buffer_; }
+  ref_vector<size_t>& offsets() { return offsets_; }
+
+  const ref_vector<char>& buffer() const { return buffer_; }
+  const ref_vector<size_t>& offsets() const { return offsets_; }
+
+  size_t size() const {
+    if (offsets_.size() == 0) {
+      return 0;
+    }
+    return offsets_.size() - 1;
+  }
+
+  std::string_view get(size_t idx) const {
+    size_t from = offsets_.get(idx);
+    size_t to = offsets_.get(idx + 1);
+    return std::string_view(buffer_.data() + from, to - from);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load_ref_vec(buffer_);
+    loader.load_ref_vec(offsets_);
+  }
+
+ private:
+  ref_vector<char> buffer_;
+  ref_vector<size_t> offsets_;
+};
+#endif
+
 }  // namespace grape
 
 #endif  // GRAPE_UTILS_STRING_VIEW_VECTOR_H_
diff --git a/grape/vertex_map/global_vertex_map.h b/grape/vertex_map/global_vertex_map.h
deleted file mode 100644
index 1a5a468b..00000000
--- a/grape/vertex_map/global_vertex_map.h
+++ /dev/null
@@ -1,318 +0,0 @@
-/** Copyright 2020 Alibaba Group Holding Limited.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#ifndef GRAPE_VERTEX_MAP_GLOBAL_VERTEX_MAP_H_
-#define GRAPE_VERTEX_MAP_GLOBAL_VERTEX_MAP_H_
-
-#include <algorithm>
-#include <atomic>
-#include <fstream>
-#include <memory>
-#include <string>
-#include <thread>
-#include <utility>
-#include <vector>
-
-#include "grape/config.h"
-#include "grape/fragment/partitioner.h"
-#include "grape/graph/id_indexer.h"
-#include "grape/serialization/in_archive.h"
-#include "grape/serialization/out_archive.h"
-#include "grape/vertex_map/vertex_map_base.h"
-#include "grape/worker/comm_spec.h"
-
-namespace grape {
-
-template <typename OID_T, typename VID_T, typename PARTITIONER_T>
-class GlobalVertexMap;
-
-template <typename OID_T, typename VID_T, typename PARTITIONER_T>
-class GlobalVertexMapBuilder {
-  using internal_oid_t = typename InternalOID<OID_T>::type;
-
- private:
-  GlobalVertexMapBuilder(fid_t fid, IdIndexer<internal_oid_t, VID_T>& indexer,
-                         const PARTITIONER_T& partitioner,
-                         const IdParser<VID_T>& id_parser)
-      : fid_(fid),
-        indexer_(indexer),
-        partitioner_(partitioner),
-        id_parser_(id_parser) {}
-
- public:
-  ~GlobalVertexMapBuilder() {}
-
-  void add_local_vertex(const internal_oid_t& id, VID_T& gid) {
-    assert(partitioner_.GetPartitionId(id) == fid_);
-    indexer_.add(id, gid);
-    id_parser_.generate_global_id(fid_, gid);
-  }
-
-  void add_vertex(const internal_oid_t& id) {
-    if (partitioner_.GetPartitionId(id) == fid_) {
-      indexer_._add(id);
-    }
-  }
-
-  void finish(GlobalVertexMap<OID_T, VID_T, PARTITIONER_T>& vertex_map) {
-    const CommSpec& comm_spec = vertex_map.GetCommSpec();
-    int worker_id = comm_spec.worker_id();
-    int worker_num = comm_spec.worker_num();
-    fid_t fnum = comm_spec.fnum();
-    {
-      std::thread recv_thread([&]() {
-        int src_worker_id = (worker_id + 1) % worker_num;
-        while (src_worker_id != worker_id) {
-          for (fid_t fid = 0; fid < fnum; ++fid) {
-            if (comm_spec.FragToWorker(fid) != src_worker_id) {
-              continue;
-            }
-            sync_comm::Recv(vertex_map.indexers_[fid], src_worker_id, 0,
-                            comm_spec.comm());
-          }
-          src_worker_id = (src_worker_id + 1) % worker_num;
-        }
-      });
-      std::thread send_thread([&]() {
-        int dst_worker_id = (worker_id + worker_num - 1) % worker_num;
-        while (dst_worker_id != worker_id) {
-          for (fid_t fid = 0; fid < fnum; ++fid) {
-            if (comm_spec.FragToWorker(fid) != worker_id) {
-              continue;
-            }
-            sync_comm::Send(indexer_, dst_worker_id, 0, comm_spec.comm());
-          }
-          dst_worker_id = (dst_worker_id + worker_num - 1) % worker_num;
-        }
-      });
-      send_thread.join();
-      recv_thread.join();
-    }
-  }
-
- private:
-  template <typename _OID_T, typename _VID_T, typename _PARTITIONER_T>
-  friend class GlobalVertexMap;
-
-  fid_t fid_;
-  IdIndexer<internal_oid_t, VID_T>& indexer_;
-  const PARTITIONER_T& partitioner_;
-  const IdParser<VID_T>& id_parser_;
-};
-
-/**
- * @brief a kind of VertexMapBase which holds global mapping information in
- * each worker.
- *
- * @tparam OID_T
- * @tparam VID_T
- */
-template <typename OID_T, typename VID_T,
-          typename PARTITIONER_T = HashPartitioner<OID_T>>
-class GlobalVertexMap : public VertexMapBase<OID_T, VID_T, PARTITIONER_T> {
-  // TODO(lxj): to support shared-memory for workers on same host (auto apps)
-
-  using base_t = VertexMapBase<OID_T, VID_T, PARTITIONER_T>;
-  using internal_oid_t = typename InternalOID<OID_T>::type;
-
- public:
-  explicit GlobalVertexMap(const CommSpec& comm_spec) : base_t(comm_spec) {}
-  ~GlobalVertexMap() = default;
-  void Init() { indexers_.resize(comm_spec_.fnum()); }
-
-  size_t GetTotalVertexSize() const {
-    size_t size = 0;
-    for (const auto& v : indexers_) {
-      size += v.size();
-    }
-    return size;
-  }
-
-  size_t GetInnerVertexSize(fid_t fid) const { return indexers_[fid].size(); }
-  void AddVertex(const OID_T& oid) {
-    fid_t fid = partitioner_.GetPartitionId(oid);
-    indexers_[fid]._add(oid);
-  }
-
-  using base_t::Lid2Gid;
-  bool AddVertex(const OID_T& oid, VID_T& gid) {
-    fid_t fid = partitioner_.GetPartitionId(oid);
-    internal_oid_t internal_oid(oid);
-    if (indexers_[fid].add(std::move(internal_oid), gid)) {
-      gid = Lid2Gid(fid, gid);
-      return true;
-    }
-    gid = Lid2Gid(fid, gid);
-    return false;
-  }
-
-  bool AddVertex(OID_T&& oid, VID_T& gid) {
-    fid_t fid = partitioner_.GetPartitionId(oid);
-    internal_oid_t internal_oid(std::move(oid));
-    if (indexers_[fid].add(std::move(internal_oid), gid)) {
-      gid = Lid2Gid(fid, gid);
-      return true;
-    }
-    gid = Lid2Gid(fid, gid);
-    return false;
-  }
-
-  using base_t::GetFidFromGid;
-  using base_t::GetLidFromGid;
-  bool GetOid(const VID_T& gid, OID_T& oid) const {
-    fid_t fid = GetFidFromGid(gid);
-    VID_T lid = GetLidFromGid(gid);
-    return GetOid(fid, lid, oid);
-  }
-
-  bool GetOid(fid_t fid, const VID_T& lid, OID_T& oid) const {
-    internal_oid_t internal_oid;
-    if (indexers_[fid].get_key(lid, internal_oid)) {
-      oid = InternalOID<OID_T>::FromInternal(internal_oid);
-      return true;
-    }
-    return false;
-  }
-
-  bool _GetGid(fid_t fid, const internal_oid_t& oid, VID_T& gid) const {
-    if (indexers_[fid].get_index(oid, gid)) {
-      gid = Lid2Gid(fid, gid);
-      return true;
-    }
-    return false;
-  }
-
-  bool GetGid(fid_t fid, const OID_T& oid, VID_T& gid) const {
-    internal_oid_t internal_oid(oid);
-    return _GetGid(fid, internal_oid, gid);
-  }
-
-  bool _GetGid(const internal_oid_t& oid, VID_T& gid) const {
-    fid_t fid = partitioner_.GetPartitionId(oid);
-    return _GetGid(fid, oid, gid);
-  }
-
-  bool GetGid(const OID_T& oid, VID_T& gid) const {
-    fid_t fid = partitioner_.GetPartitionId(oid);
-    return GetGid(fid, oid, gid);
-  }
-
-  GlobalVertexMapBuilder<OID_T, VID_T, PARTITIONER_T> GetLocalBuilder() {
-    fid_t fid = comm_spec_.fid();
-    return GlobalVertexMapBuilder<OID_T, VID_T, PARTITIONER_T>(
-        fid, indexers_[fid], partitioner_, id_parser_);
-  }
-
- private:
-  template <typename IOADAPTOR_T>
-  void serialize(const std::string& path) {
-    auto io_adaptor = std::unique_ptr<IOADAPTOR_T>(new IOADAPTOR_T(path));
-    io_adaptor->Open("wb");
-    base_t::serialize(io_adaptor);
-    for (fid_t i = 0; i < comm_spec_.fnum(); ++i) {
-      indexers_[i].Serialize(io_adaptor);
-    }
-    io_adaptor->Close();
-  }
-
- public:
-  template <typename IOADAPTOR_T>
-  void Serialize(const std::string& prefix) {
-    char fbuf[1024];
-    snprintf(fbuf, sizeof(fbuf), "%s/%s", prefix.c_str(),
-             kSerializationVertexMapFilename);
-    std::string path = std::string(fbuf);
-    if (comm_spec_.worker_id() == 0) {
-      serialize<IOADAPTOR_T>(path);
-    }
-    MPI_Barrier(comm_spec_.comm());
-    auto exists_file = [](const std::string& name) {
-      std::ifstream f(name.c_str());
-      return f.good();
-    };
-    if (!exists_file(path) && comm_spec_.local_id() == 0) {
-      serialize<IOADAPTOR_T>(path);
-    }
-    MPI_Barrier(comm_spec_.comm());
-    if (!exists_file(path)) {
-      serialize<IOADAPTOR_T>(path);
-    }
-  }
-
-  template <typename IOADAPTOR_T>
-  void Deserialize(const std::string& prefix, fid_t fid) {
-    char fbuf[1024];
-    snprintf(fbuf, sizeof(fbuf), "%s/%s", prefix.c_str(),
-             kSerializationVertexMapFilename);
-
-    auto io_adaptor =
-        std::unique_ptr<IOADAPTOR_T>(new IOADAPTOR_T(std::string(fbuf)));
-    io_adaptor->Open();
-
-    base_t::deserialize(io_adaptor);
-
-    indexers_.resize(comm_spec_.fnum());
-    for (fid_t i = 0; i < comm_spec_.fnum(); ++i) {
-      indexers_[i].Deserialize(io_adaptor);
-    }
-    io_adaptor->Close();
-  }
-
-  void UpdateToBalance(std::vector<VID_T>& vnum_list,
-                       std::vector<std::vector<VID_T>>& gid_maps) {
-    fid_t fnum = comm_spec_.fnum();
-    std::vector<std::vector<internal_oid_t>> oid_lists(fnum);
-    for (fid_t i = 0; i < fnum; ++i) {
-      oid_lists[i].resize(vnum_list[i]);
-    }
-    for (fid_t fid = 0; fid < fnum; ++fid) {
-      auto& old_indexer = indexers_[fid];
-      VID_T vnum = old_indexer.size();
-      for (VID_T i = 0; i < vnum; ++i) {
-        VID_T new_gid = gid_maps[fid][i];
-        internal_oid_t oid;
-        fid_t new_fid = GetFidFromGid(new_gid);
-        CHECK(old_indexer.get_key(i, oid));
-        if (new_fid != fid) {
-          OID_T id = InternalOID<OID_T>::FromInternal(oid);
-          partitioner_.SetPartitionId(id, new_fid);
-        }
-        VID_T new_lid = GetLidFromGid(new_gid);
-        oid_lists[new_fid][new_lid] = oid;
-      }
-    }
-    std::vector<IdIndexer<internal_oid_t, VID_T>> new_indexers(fnum);
-    for (fid_t i = 0; i < fnum; ++i) {
-      auto& indexer = new_indexers[i];
-      for (auto& oid : oid_lists[i]) {
-        indexer._add(oid);
-      }
-    }
-    std::swap(indexers_, new_indexers);
-  }
-
- private:
-  template <typename _OID_T, typename _VID_T, typename _PARTITIONER_T>
-  friend class GlobalVertexMapBuilder;
-
-  std::vector<IdIndexer<internal_oid_t, VID_T>> indexers_;
-  using base_t::comm_spec_;
-  using base_t::id_parser_;
-  using base_t::partitioner_;
-};
-
-}  // namespace grape
-
-#endif  // GRAPE_VERTEX_MAP_GLOBAL_VERTEX_MAP_H_
diff --git a/grape/vertex_map/idxers/hashmap_idxer.h b/grape/vertex_map/idxers/hashmap_idxer.h
new file mode 100644
index 00000000..ae079f89
--- /dev/null
+++ b/grape/vertex_map/idxers/hashmap_idxer.h
@@ -0,0 +1,130 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_VERTEX_MAP_IDXERS_HASHMAP_IDXER_H_
+#define GRAPE_VERTEX_MAP_IDXERS_HASHMAP_IDXER_H_
+
+#include "grape/graph/id_indexer.h"
+#include "grape/vertex_map/idxers/idxer_base.h"
+
+namespace grape {
+
+template <typename OID_T, typename VID_T>
+class HashMapIdxer : public IdxerBase<OID_T, VID_T> {
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+ public:
+  HashMapIdxer() {}
+  explicit HashMapIdxer(IdIndexer<internal_oid_t, VID_T>&& indexer)
+      : indexer_(std::move(indexer)) {}
+
+  bool get_key(VID_T vid, internal_oid_t& oid) const override {
+    return indexer_.get_key(vid, oid);
+  }
+
+  bool get_index(const internal_oid_t& oid, VID_T& vid) const override {
+    return indexer_.get_index(oid, vid);
+  }
+
+  IdxerType type() const override { return IdxerType::kHashMapIdxer; }
+
+  void serialize(std::unique_ptr<IOAdaptorBase>& writer) override {
+    indexer_.Serialize(writer);
+  }
+  void deserialize(std::unique_ptr<IOAdaptorBase>& reader) override {
+    indexer_.Deserialize(reader);
+  }
+
+  size_t size() const override { return indexer_.size(); }
+
+  void add(const internal_oid_t& oid) { indexer_._add(oid); }
+
+  size_t memory_usage() const override { return indexer_.memory_usage(); }
+
+ private:
+  IdIndexer<internal_oid_t, VID_T> indexer_;
+};
+
+template <typename OID_T, typename VID_T>
+class HashMapIdxerDummyBuilder : public IdxerBuilderBase<OID_T, VID_T> {
+ public:
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+  void add(const internal_oid_t& oid) override {}
+
+  std::unique_ptr<IdxerBase<OID_T, VID_T>> finish() override {
+    return std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new HashMapIdxer<OID_T, VID_T>(std::move(indexer_)));
+  }
+
+  void sync_request(const CommSpec& comm_spec, int target, int tag) override {
+    int req_type = 0;
+    sync_comm::Send(req_type, target, tag, comm_spec.comm());
+    sync_comm::Recv(indexer_, target, tag + 1, comm_spec.comm());
+  }
+  void sync_response(const CommSpec& comm_spec, int source, int tag) override {
+    LOG(ERROR)
+        << "HashMapIdxerDummyBuilder should not be used to sync response";
+  }
+
+ private:
+  IdIndexer<internal_oid_t, VID_T> indexer_;
+};
+
+template <typename OID_T, typename VID_T>
+class HashMapIdxerBuilder : public IdxerBuilderBase<OID_T, VID_T> {
+ public:
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+  void add(const internal_oid_t& oid) override { indexer_._add(oid); }
+
+  std::unique_ptr<IdxerBase<OID_T, VID_T>> finish() override {
+    return std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new HashMapIdxer<OID_T, VID_T>(std::move(indexer_)));
+  }
+
+  void sync_request(const CommSpec& comm_spec, int target, int tag) override {
+    LOG(ERROR) << "HashMapIdxerBuilder should not be used to sync request";
+  }
+
+  void sync_response(const CommSpec& comm_spec, int source, int tag) override {
+    int req_type;
+    sync_comm::Recv(req_type, source, tag, comm_spec.comm());
+    if (req_type == 0) {
+      // request all
+      sync_comm::Send(indexer_, source, tag + 1, comm_spec.comm());
+    } else if (req_type == 1) {
+      // request partial
+      typename IdIndexer<internal_oid_t, VID_T>::key_buffer_t keys;
+      sync_comm::Recv(keys, source, tag, comm_spec.comm());
+      std::vector<VID_T> response;
+      size_t keys_num = keys.size();
+      for (size_t i = 0; i < keys_num; ++i) {
+        VID_T vid;
+        if (indexer_.get_index(keys.get(i), vid)) {
+          response.push_back(vid);
+        } else {
+          response.push_back(std::numeric_limits<VID_T>::max());
+        }
+      }
+      sync_comm::Send(response, source, tag + 1, comm_spec.comm());
+    }
+  }
+
+ private:
+  IdIndexer<internal_oid_t, VID_T> indexer_;
+};
+
+}  // namespace grape
+
+#endif  // GRAPE_VERTEX_MAP_IDXERS_HASHMAP_IDXER_H_
diff --git a/grape/vertex_map/idxers/hashmap_idxer_view.h b/grape/vertex_map/idxers/hashmap_idxer_view.h
new file mode 100644
index 00000000..8ffe2e55
--- /dev/null
+++ b/grape/vertex_map/idxers/hashmap_idxer_view.h
@@ -0,0 +1,157 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_VERTEX_MAP_IDXERS_HASHMAP_IDXER_VIEW_H_
+#define GRAPE_VERTEX_MAP_IDXERS_HASHMAP_IDXER_VIEW_H_
+
+#include "grape/graph/id_indexer.h"
+#include "grape/vertex_map/idxers/idxer_base.h"
+
+namespace grape {
+
+template <typename OID_T, typename VID_T>
+class HashMapIdxerView : public IdxerBase<OID_T, VID_T> {
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+ public:
+  HashMapIdxerView() {}
+  explicit HashMapIdxerView(Array<char, Allocator<char>>&& buf)
+      : buffer_(std::move(buf)) {
+    indexer_.Init(buffer_.data(), buffer_.size());
+  }
+  ~HashMapIdxerView() {}
+
+  bool get_key(VID_T vid, internal_oid_t& oid) const override {
+    return indexer_.get_key(vid, oid);
+  }
+
+  bool get_index(const internal_oid_t& oid, VID_T& vid) const override {
+    return indexer_.get_index(oid, vid);
+  }
+
+  IdxerType type() const override { return IdxerType::kHashMapIdxerView; }
+
+  void serialize(std::unique_ptr<IOAdaptorBase>& writer) override {
+    size_t size = buffer_.size();
+    writer->Write(&size, sizeof(size_t));
+    if (size > 0) {
+      writer->Write(buffer_.data(), size);
+    }
+  }
+
+  void deserialize(std::unique_ptr<IOAdaptorBase>& reader) override {
+    size_t size;
+    CHECK(reader->Read(&size, sizeof(size_t)));
+    if (size > 0) {
+      buffer_.resize(size);
+      CHECK(reader->Read(buffer_.data(), size));
+      indexer_.Init(buffer_.data(), size);
+    }
+  }
+
+  size_t size() const override { return indexer_.size(); }
+
+  size_t memory_usage() const override { return buffer_.size(); }
+
+ private:
+  IdIndexerView<internal_oid_t, VID_T> indexer_;
+  Array<char, Allocator<char>> buffer_;
+};
+
+template <typename OID_T, typename VID_T>
+class HashMapIdxerViewDummyBuilder : public IdxerBuilderBase<OID_T, VID_T> {
+ public:
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+  void add(const internal_oid_t& oid) override {}
+
+  std::unique_ptr<IdxerBase<OID_T, VID_T>> finish() override {
+    return std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new HashMapIdxerView<OID_T, VID_T>(std::move(buffer_)));
+  }
+
+  void sync_request(const CommSpec& comm_spec, int target, int tag) override {
+    int req_type = 0;
+    sync_comm::Send(req_type, target, tag, comm_spec.comm());
+    sync_comm::Recv(buffer_, target, tag + 1, comm_spec.comm());
+  }
+  void sync_response(const CommSpec& comm_spec, int source, int tag) override {
+    LOG(ERROR)
+        << "HashMapIdxerViewDummyBuilder should not be used to sync response";
+  }
+
+ private:
+  Array<char, Allocator<char>> buffer_;
+};
+
+template <typename OID_T, typename VID_T>
+class HashMapIdxerViewBuilder : public IdxerBuilderBase<OID_T, VID_T> {
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+ public:
+  HashMapIdxerViewBuilder() {}
+  ~HashMapIdxerViewBuilder() {}
+
+  void add(const internal_oid_t& oid) override { indexer_._add(oid); }
+
+  std::unique_ptr<IdxerBase<OID_T, VID_T>> finish() override {
+    if (buffer_.empty() && indexer_.size() > 0) {
+      indexer_.serialize_to_mem(buffer_);
+    }
+    Array<char, Allocator<char>> buffer;
+    buffer.resize(buffer_.size());
+    memcpy(buffer.data(), buffer_.data(), buffer_.size());
+    return std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new HashMapIdxerView<OID_T, VID_T>(std::move(buffer)));
+  }
+
+  void sync_request(const CommSpec& comm_spec, int target, int tag) override {
+    LOG(ERROR) << "HashMapIdxerBuilder should not be used to sync request";
+  }
+
+  void sync_response(const CommSpec& comm_spec, int source, int tag) override {
+    int req_type;
+    sync_comm::Recv(req_type, source, tag, comm_spec.comm());
+    if (req_type == 0) {
+      // request all
+      if (buffer_.empty() && indexer_.size() > 0) {
+        indexer_.serialize_to_mem(buffer_);
+      }
+      sync_comm::Send(buffer_, source, tag + 1, comm_spec.comm());
+    } else if (req_type == 1) {
+      // request partial
+      typename IdIndexer<OID_T, VID_T>::key_buffer_t keys;
+      sync_comm::Recv(keys, source, tag, comm_spec.comm());
+      std::vector<VID_T> response;
+      size_t keys_num = keys.size();
+      for (size_t i = 0; i < keys_num; ++i) {
+        VID_T vid;
+        if (indexer_.get_index(keys.get(i), vid)) {
+          response.push_back(vid);
+        } else {
+          response.push_back(std::numeric_limits<VID_T>::max());
+        }
+      }
+      sync_comm::Send(response, source, tag + 1, comm_spec.comm());
+    }
+  }
+
+ private:
+  IdIndexer<internal_oid_t, VID_T> indexer_;
+  std::vector<char> buffer_;
+};
+
+}  // namespace grape
+
+#endif  // GRAPE_VERTEX_MAP_IDXERS_HASHMAP_IDXER_VIEW_H_
diff --git a/grape/vertex_map/idxers/idxer_base.h b/grape/vertex_map/idxers/idxer_base.h
new file mode 100644
index 00000000..4d3224b2
--- /dev/null
+++ b/grape/vertex_map/idxers/idxer_base.h
@@ -0,0 +1,105 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_VERTEX_MAP_IDXERS_IDXER_BASE_H_
+#define GRAPE_VERTEX_MAP_IDXERS_IDXER_BASE_H_
+
+#include "grape/worker/comm_spec.h"
+
+namespace grape {
+
+enum class IdxerType {
+  kHashMapIdxer,
+  kLocalIdxer,
+  kPTHashIdxer,
+  kHashMapIdxerView,
+  kSortedArrayIdxer,
+};
+
+template <typename OID_T, typename VID_T>
+class IdxerBase {
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+ public:
+  virtual ~IdxerBase() = default;
+
+  virtual bool get_key(VID_T vid, internal_oid_t& oid) const = 0;
+
+  virtual bool get_index(const internal_oid_t& oid, VID_T& vid) const = 0;
+
+  virtual IdxerType type() const = 0;
+
+  virtual size_t size() const = 0;
+
+  virtual size_t memory_usage() const = 0;
+
+  virtual void serialize(std::unique_ptr<IOAdaptorBase>& writer) = 0;
+  virtual void deserialize(std::unique_ptr<IOAdaptorBase>& reader) = 0;
+};
+
+template <typename OID_T, typename VID_T>
+class IdxerBuilderBase {
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+ public:
+  virtual ~IdxerBuilderBase() = default;
+
+  virtual void add(const internal_oid_t& oid) = 0;
+
+  virtual std::unique_ptr<IdxerBase<OID_T, VID_T>> finish() = 0;
+
+  virtual void sync_request(const CommSpec& comm_spec, int target, int tag) = 0;
+  virtual void sync_response(const CommSpec& comm_spec, int source,
+                             int tag) = 0;
+};
+
+template <typename OID_T, typename VID_T>
+void serialize_idxer(std::unique_ptr<IOAdaptorBase>& writer,
+                     std::unique_ptr<IdxerBase<OID_T, VID_T>>& idxer) {
+  int type = static_cast<int>(idxer->type());
+  writer->Write(&type, sizeof(type));
+  idxer->serialize(writer);
+}
+
+}  // namespace grape
+
+namespace std {
+inline ostream& operator<<(ostream& os, const grape::IdxerType& type) {
+  switch (type) {
+  case grape::IdxerType::kHashMapIdxer:
+    os << "HashMapIdxer";
+    break;
+  case grape::IdxerType::kLocalIdxer:
+    os << "LocalIdxer";
+    break;
+  case grape::IdxerType::kPTHashIdxer:
+    os << "PTHashIdxer";
+    break;
+  case grape::IdxerType::kHashMapIdxerView:
+    os << "HashMapIdxerView";
+    break;
+  case grape::IdxerType::kSortedArrayIdxer:
+    os << "SortedArrayIdxer";
+    break;
+  default:
+    os << "Unknown";
+    break;
+  }
+  return os;
+}
+
+}  // namespace std
+
+#endif  // GRAPE_VERTEX_MAP_IDXERS_IDXER_BASE_H_
diff --git a/grape/vertex_map/idxers/idxers.h b/grape/vertex_map/idxers/idxers.h
new file mode 100644
index 00000000..94233d02
--- /dev/null
+++ b/grape/vertex_map/idxers/idxers.h
@@ -0,0 +1,114 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_VERTEX_MAP_IDXERS_IDXERS_H_
+#define GRAPE_VERTEX_MAP_IDXERS_IDXERS_H_
+
+#include "grape/vertex_map/idxers/hashmap_idxer.h"
+#include "grape/vertex_map/idxers/hashmap_idxer_view.h"
+#include "grape/vertex_map/idxers/local_idxer.h"
+#include "grape/vertex_map/idxers/pthash_idxer.h"
+#include "grape/vertex_map/idxers/sorted_array_idxer.h"
+
+namespace grape {
+
+template <typename OID_T, typename VID_T>
+std::unique_ptr<IdxerBase<OID_T, VID_T>> deserialize_idxer(
+    std::unique_ptr<IOAdaptorBase>& reader) {
+  int type;
+  reader->Read(&type, sizeof(type));
+  IdxerType idxer_type = static_cast<IdxerType>(type);
+  switch (idxer_type) {
+  case IdxerType::kHashMapIdxer: {
+    auto idxer = std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new HashMapIdxer<OID_T, VID_T>());
+    idxer->deserialize(reader);
+    return idxer;
+  }
+  case IdxerType::kLocalIdxer: {
+    auto idxer = std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new LocalIdxer<OID_T, VID_T>());
+    idxer->deserialize(reader);
+    return idxer;
+  }
+  case IdxerType::kHashMapIdxerView: {
+    auto idxer = std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new HashMapIdxerView<OID_T, VID_T>());
+    idxer->deserialize(reader);
+    return idxer;
+  }
+  case IdxerType::kPTHashIdxer: {
+    auto idxer = std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new PTHashIdxer<OID_T, VID_T>());
+    idxer->deserialize(reader);
+    return idxer;
+  }
+  case IdxerType::kSortedArrayIdxer: {
+    auto idxer = std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new SortedArrayIdxer<OID_T, VID_T>());
+    idxer->deserialize(reader);
+    return idxer;
+  }
+  default:
+    return nullptr;
+  }
+}
+
+template <typename OID_T, typename VID_T>
+std::unique_ptr<IdxerBase<OID_T, VID_T>> extend_indexer(
+    std::unique_ptr<IdxerBase<OID_T, VID_T>>&& input,
+    const std::vector<OID_T>& id_list, VID_T base) {
+  if (input->type() == IdxerType::kHashMapIdxer) {
+    auto casted = std::unique_ptr<HashMapIdxer<OID_T, VID_T>>(
+        dynamic_cast<HashMapIdxer<OID_T, VID_T>*>(input.release()));
+    for (auto& id : id_list) {
+      casted->add(id);
+    }
+    return casted;
+  } else if (input->type() == IdxerType::kLocalIdxer) {
+    auto casted = std::unique_ptr<LocalIdxer<OID_T, VID_T>>(
+        dynamic_cast<LocalIdxer<OID_T, VID_T>*>(input.release()));
+    for (auto& id : id_list) {
+      casted->add(id, base++);
+    }
+    return casted;
+  } else {
+    LOG(ERROR) << "Only HashMapIdxer or LocalIdxer can be extended";
+    return std::move(input);
+  }
+}
+
+inline IdxerType parse_idxer_type_name(const std::string& name) {
+  if (name == "hashmap") {
+    return IdxerType::kHashMapIdxer;
+  } else if (name == "local") {
+    return IdxerType::kLocalIdxer;
+  } else if (name == "pthash") {
+    return IdxerType::kPTHashIdxer;
+  } else if (name == "sorted_array") {
+    return IdxerType::kSortedArrayIdxer;
+  } else if (name == "hashmap_view") {
+    return IdxerType::kHashMapIdxerView;
+  } else {
+    LOG(INFO) << "unrecognized idxer type: " << name
+              << ", use hashmap idxer "
+                 "as default";
+    return IdxerType::kHashMapIdxer;
+  }
+}
+
+}  // namespace grape
+
+#endif  // GRAPE_VERTEX_MAP_IDXERS_IDXERS_H_
diff --git a/grape/vertex_map/idxers/local_idxer.h b/grape/vertex_map/idxers/local_idxer.h
new file mode 100644
index 00000000..2499c43e
--- /dev/null
+++ b/grape/vertex_map/idxers/local_idxer.h
@@ -0,0 +1,121 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_VERTEX_MAP_IDXERS_LOCAL_IDXER_H_
+#define GRAPE_VERTEX_MAP_IDXERS_LOCAL_IDXER_H_
+
+#include "grape/vertex_map/idxers/idxer_base.h"
+
+namespace grape {
+
+template <typename OID_T, typename VID_T>
+class LocalIdxer : public IdxerBase<OID_T, VID_T> {
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+ public:
+  LocalIdxer() {}
+  LocalIdxer(IdIndexer<internal_oid_t, VID_T>&& oid_indexer,
+             IdIndexer<VID_T, VID_T>&& lid_indexer)
+      : oid_indexer_(std::move(oid_indexer)),
+        lid_indexer_(std::move(lid_indexer)) {}
+
+  bool get_key(VID_T vid, internal_oid_t& oid) const override {
+    VID_T idx;
+    if (lid_indexer_.get_index(vid, idx)) {
+      return oid_indexer_.get_key(idx, oid);
+    } else {
+      return false;
+    }
+  }
+
+  bool get_index(const internal_oid_t& oid, VID_T& vid) const override {
+    VID_T idx;
+    if (oid_indexer_.get_index(oid, idx)) {
+      return lid_indexer_.get_key(idx, vid);
+    } else {
+      return false;
+    }
+  }
+
+  IdxerType type() const override { return IdxerType::kLocalIdxer; }
+
+  void serialize(std::unique_ptr<IOAdaptorBase>& writer) override {
+    oid_indexer_.Serialize(writer);
+    lid_indexer_.Serialize(writer);
+  }
+  void deserialize(std::unique_ptr<IOAdaptorBase>& reader) override {
+    oid_indexer_.Deserialize(reader);
+    lid_indexer_.Deserialize(reader);
+  }
+
+  size_t size() const override { return oid_indexer_.size(); }
+
+  void add(const internal_oid_t& oid, VID_T vid) {
+    size_t before = oid_indexer_.size();
+    oid_indexer_._add(oid);
+    if (oid_indexer_.size() > before) {
+      lid_indexer_._add(vid);
+    }
+  }
+
+  size_t memory_usage() const override {
+    return oid_indexer_.memory_usage() + lid_indexer_.memory_usage();
+  }
+
+ private:
+  IdIndexer<internal_oid_t, VID_T> oid_indexer_;  // oid -> idx
+  IdIndexer<VID_T, VID_T> lid_indexer_;           // lid -> idx
+};
+
+template <typename OID_T, typename VID_T>
+class LocalIdxerBuilder : public IdxerBuilderBase<OID_T, VID_T> {
+ public:
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+  void add(const internal_oid_t& oid) override { oid_indexer_._add(oid); }
+
+  std::unique_ptr<IdxerBase<OID_T, VID_T>> finish() override {
+    return std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new LocalIdxer<OID_T, VID_T>(std::move(oid_indexer_),
+                                     std::move(lid_indexer_)));
+  }
+
+  void sync_request(const CommSpec& comm_spec, int target, int tag) override {
+    int req_type = 1;
+    sync_comm::Send(req_type, target, tag, comm_spec.comm());
+    sync_comm::Send(oid_indexer_.keys(), target, tag, comm_spec.comm());
+    std::vector<VID_T> response;
+    sync_comm::Recv(response, target, tag + 1, comm_spec.comm());
+    VID_T sentinel = std::numeric_limits<VID_T>::max();
+    for (size_t i = 0; i < oid_indexer_.size(); ++i) {
+      if (response[i] != std::numeric_limits<VID_T>::max()) {
+        lid_indexer_._add(response[i]);
+      } else {
+        lid_indexer_._add(sentinel);
+        --sentinel;
+      }
+    }
+  }
+  void sync_response(const CommSpec& comm_spec, int source, int tag) override {
+    LOG(ERROR) << "LocalIdxerBuilder should not be used to sync response";
+  }
+
+ private:
+  IdIndexer<internal_oid_t, VID_T> oid_indexer_;
+  IdIndexer<VID_T, VID_T> lid_indexer_;
+};
+
+}  // namespace grape
+
+#endif  // GRAPE_VERTEX_MAP_IDXERS_LOCAL_IDXER_H_
diff --git a/grape/vertex_map/idxers/pthash_idxer.h b/grape/vertex_map/idxers/pthash_idxer.h
new file mode 100644
index 00000000..3cd3d4ad
--- /dev/null
+++ b/grape/vertex_map/idxers/pthash_idxer.h
@@ -0,0 +1,186 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_VERTEX_MAP_IDXERS_PTHASH_IDXER_H_
+#define GRAPE_VERTEX_MAP_IDXERS_PTHASH_IDXER_H_
+
+#include "grape/util.h"
+#include "grape/utils/gcontainer.h"
+#include "grape/utils/pthash_utils/ph_indexer_view.h"
+#include "grape/vertex_map/idxers/idxer_base.h"
+
+namespace grape {
+
+template <typename OID_T, typename VID_T>
+class PTHashIdxer : public IdxerBase<OID_T, VID_T> {
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+ public:
+  PTHashIdxer() {}
+  explicit PTHashIdxer(Array<char, Allocator<char>>&& buf)
+      : buffer_(std::move(buf)) {
+    idxer_.init(buffer_.data(), buffer_.size());
+  }
+  ~PTHashIdxer() {}
+
+  void Init(void* buffer, size_t size) { idxer_.init(buffer, size); }
+
+  bool get_key(VID_T vid, internal_oid_t& oid) const override {
+    return idxer_.get_key(vid, oid);
+  }
+
+  bool get_index(const internal_oid_t& oid, VID_T& vid) const override {
+    return idxer_.get_index(oid, vid);
+  }
+
+  IdxerType type() const override { return IdxerType::kPTHashIdxer; }
+
+  void serialize(std::unique_ptr<IOAdaptorBase>& writer) override {
+    idxer_.Serialize(writer);
+  }
+
+  void deserialize(std::unique_ptr<IOAdaptorBase>& reader) override {
+    size_t size;
+    CHECK(reader->Read(&size, sizeof(size_t)));
+    if (size > 0) {
+      buffer_.resize(size);
+      CHECK(reader->Read(buffer_.data(), size));
+      idxer_.init(buffer_.data(), size);
+    }
+  }
+
+  size_t size() const override { return idxer_.size(); }
+
+  size_t memory_usage() const override { return buffer_.size(); }
+
+ private:
+  Array<char, Allocator<char>> buffer_;
+  PHIndexerView<internal_oid_t, VID_T> idxer_;
+};
+
+template <typename OID_T, typename VID_T>
+class PTHashIdxerDummyBuilder : public IdxerBuilderBase<OID_T, VID_T> {
+ public:
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+  void add(const internal_oid_t& oid) override {}
+
+  void sync_request(const CommSpec& comm_spec, int target, int tag) override {
+    sync_comm::Recv(buffer_, target, tag, comm_spec.comm());
+  }
+
+  void sync_response(const CommSpec& comm_spec, int source, int tag) override {
+    LOG(ERROR) << "PTHashIdxerDummyBuilder should not be used to sync response";
+  }
+
+  std::unique_ptr<IdxerBase<OID_T, VID_T>> finish() override {
+    return std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new PTHashIdxer<OID_T, VID_T>(std::move(buffer_)));
+  }
+
+ private:
+  Array<char, Allocator<char>> buffer_;
+};
+
+template <typename OID_T, typename VID_T>
+class PTHashIdxerBuilder : public IdxerBuilderBase<OID_T, VID_T> {
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+ public:
+  PTHashIdxerBuilder() {}
+  ~PTHashIdxerBuilder() {}
+
+  void add(const internal_oid_t& oid) override { keys_.push_back(OID_T(oid)); }
+
+  void buildPhf() {
+    if (build_phf_) {
+      return;
+    }
+    DistinctSort(keys_);
+    SinglePHFView<murmurhasher>::build(keys_.begin(), keys_.size(), phf_, 1);
+    std::vector<OID_T> ordered_keys(keys_.size());
+    for (auto& key : keys_) {
+      size_t idx = phf_(key);
+      ordered_keys[idx] = key;
+    }
+    key_buffer_.clear();
+    for (auto& key : ordered_keys) {
+      key_buffer_.push_back(key);
+    }
+    build_phf_ = true;
+  }
+
+  size_t getSerializeSize() {
+    return phf_.num_bits() / 8 + key_buffer_.dump_size();
+  }
+
+  /*
+   * Finish building the perfect hash index in a allocated buffer.
+   * After add all keys, call buildPhf to build the perfect hash function.
+   * And then allocate a buffer with getSerializeSize() bytes.
+   * Call finishInplace to finish building the index in the buffer.
+   */
+  void finishInplace(void* buffer, size_t size,
+                     PTHashIdxer<OID_T, VID_T>& idxer) {
+    external_mem_dumper dumper(reinterpret_cast<char*>(buffer), size);
+    phf_.dump(dumper);
+    key_buffer_.dump(dumper);
+    idxer.Init(buffer, size);
+  }
+
+  /*
+   * Finish building the perfect hash index in an internal
+   * buffer(std::vector<char>). After add all keys, call finish to build the
+   * perfect hash index and serialize it.
+   */
+  std::unique_ptr<IdxerBase<OID_T, VID_T>> finish() override {
+    buildPhf();
+    if (getSerializeSize() != buffer_.size()) {
+      buffer_.resize(getSerializeSize());
+      external_mem_dumper dumper(buffer_.data(), buffer_.size());
+      phf_.dump(dumper);
+      key_buffer_.dump(dumper);
+    }
+    return std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new PTHashIdxer<OID_T, VID_T>(std::move(buffer_)));
+  }
+
+  void sync_request(const CommSpec& comm_spec, int target, int tag) override {
+    LOG(ERROR) << "PTHashIdxerBuilder should not be used to sync request";
+  }
+
+  void sync_response(const CommSpec& comm_spec, int source, int tag) override {
+    buildPhf();
+    if (getSerializeSize() != buffer_.size()) {
+      buffer_.resize(getSerializeSize());
+      external_mem_dumper dumper(buffer_.data(), buffer_.size());
+      phf_.dump(dumper);
+      key_buffer_.dump(dumper);
+    }
+
+    sync_comm::Send(buffer_, source, tag, comm_spec.comm());
+  }
+
+ private:
+  std::vector<OID_T> keys_;
+  id_indexer_impl::KeyBuffer<internal_oid_t> key_buffer_;
+  pthash::single_phf<murmurhasher, pthash::dictionary_dictionary, true> phf_;
+
+  Array<char, Allocator<char>> buffer_;
+  bool build_phf_ = false;
+};
+
+}  // namespace grape
+
+#endif  // GRAPE_VERTEX_MAP_IDXERS_PTHASH_IDXER_H_
diff --git a/grape/vertex_map/idxers/sorted_array_idxer.h b/grape/vertex_map/idxers/sorted_array_idxer.h
new file mode 100644
index 00000000..0eaf48da
--- /dev/null
+++ b/grape/vertex_map/idxers/sorted_array_idxer.h
@@ -0,0 +1,198 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_VERTEX_MAP_IDXERS_SORTED_ARRAY_IDXER_H_
+#define GRAPE_VERTEX_MAP_IDXERS_SORTED_ARRAY_IDXER_H_
+
+#include "grape/utils/gcontainer.h"
+#include "grape/vertex_map/idxers/idxer_base.h"
+
+namespace grape {
+
+template <typename OID_T, typename VID_T>
+class SortedArrayIdxer : public IdxerBase<OID_T, VID_T> {
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+ public:
+  SortedArrayIdxer() {}
+  explicit SortedArrayIdxer(Array<OID_T, Allocator<OID_T>>&& id_list)
+      : id_list_(std::move(id_list)) {}
+  ~SortedArrayIdxer() {}
+
+  bool get_key(VID_T vid, internal_oid_t& oid) const override {
+    if (vid >= id_list_.size()) {
+      return false;
+    }
+    oid = id_list_[vid];
+    return true;
+  }
+
+  bool get_index(const internal_oid_t& oid, VID_T& vid) const override {
+    auto it = std::lower_bound(id_list_.begin(), id_list_.end(), oid);
+    if (it == id_list_.end() || *it != oid) {
+      return false;
+    }
+    vid = it - id_list_.begin();
+    return true;
+  }
+
+  IdxerType type() const override { return IdxerType::kSortedArrayIdxer; }
+
+  void serialize(std::unique_ptr<IOAdaptorBase>& writer) override {
+    size_t size = id_list_.size();
+    writer->Write(&size, sizeof(size_t));
+    writer->Write(id_list_.data(), size * sizeof(OID_T));
+  }
+
+  void deserialize(std::unique_ptr<IOAdaptorBase>& reader) override {
+    size_t size;
+    reader->Read(&size, sizeof(size_t));
+    id_list_.resize(size);
+    reader->Read(id_list_.data(), size * sizeof(OID_T));
+  }
+
+  size_t size() const override { return id_list_.size(); }
+
+  size_t memory_usage() const override {
+    return id_list_.size() * sizeof(OID_T);
+  }
+
+ private:
+  Array<OID_T, Allocator<OID_T>> id_list_;
+};
+
+template <typename VID_T>
+class SortedArrayIdxer<std::string, VID_T>
+    : public IdxerBase<std::string, VID_T> {
+  using internal_oid_t = typename InternalOID<std::string>::type;
+
+ public:
+  SortedArrayIdxer() {}
+  explicit SortedArrayIdxer(
+      Array<std::string, Allocator<std::string>>&& id_list) {
+    for (auto& id : id_list) {
+      id_list_.emplace_back(id);
+    }
+  }
+  ~SortedArrayIdxer() {}
+
+  bool get_key(VID_T vid, internal_oid_t& oid) const override {
+    if (vid >= id_list_.size()) {
+      return false;
+    }
+    oid = internal_oid_t(id_list_[vid]);
+    return true;
+  }
+
+  bool get_index(const internal_oid_t& oid, VID_T& vid) const override {
+    size_t num = id_list_.size();
+    size_t low = 0, high = num - 1;
+    nonstd::string_view oid_view(oid);
+    while (low <= high) {
+      size_t mid = low + (high - low) / 2;
+      if (id_list_[mid] == oid_view) {
+        vid = mid;
+        return true;
+      } else if (id_list_[mid] < oid_view) {
+        low = mid + 1;
+      } else {
+        high = mid - 1;
+      }
+    }
+    return false;
+  }
+
+  IdxerType type() const override { return IdxerType::kSortedArrayIdxer; }
+
+  void serialize(std::unique_ptr<IOAdaptorBase>& writer) override {
+    id_list_.serialize(writer);
+  }
+
+  void deserialize(std::unique_ptr<IOAdaptorBase>& reader) override {
+    id_list_.deserialize(reader);
+  }
+
+  size_t size() const override { return id_list_.size(); }
+
+  size_t memory_usage() const override {
+    return id_list_.content_buffer().size() +
+           id_list_.offset_buffer().size() * sizeof(size_t);
+  }
+
+ private:
+  StringViewVector id_list_;
+};
+
+template <typename OID_T, typename VID_T>
+class SortedArrayIdxerDummyBuilder : public IdxerBuilderBase<OID_T, VID_T> {
+ public:
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+  void add(const internal_oid_t& oid) override {}
+
+  std::unique_ptr<IdxerBase<OID_T, VID_T>> finish() override {
+    return std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new SortedArrayIdxer<OID_T, VID_T>(std::move(id_list_)));
+  }
+
+  void sync_request(const CommSpec& comm_spec, int target, int tag) override {
+    sync_comm::Recv(id_list_, target, tag, comm_spec.comm());
+  }
+
+  void sync_response(const CommSpec& comm_spec, int source, int tag) override {
+    LOG(ERROR) << "SortedArrayIdxerDummyBuilder should not be used to sync "
+                  "response";
+  }
+
+ private:
+  Array<OID_T, Allocator<OID_T>> id_list_;
+};
+
+template <typename OID_T, typename VID_T>
+class SortedArrayIdxerBuilder : public IdxerBuilderBase<OID_T, VID_T> {
+ public:
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+  void add(const internal_oid_t& oid) override { keys_.push_back(OID_T(oid)); }
+
+  std::unique_ptr<IdxerBase<OID_T, VID_T>> finish() override {
+    if (!sorted_) {
+      DistinctSort(keys_);
+      sorted_ = true;
+    }
+    Array<OID_T, Allocator<OID_T>> id_list(keys_.size());
+    std::copy(keys_.begin(), keys_.end(), id_list.begin());
+    return std::unique_ptr<IdxerBase<OID_T, VID_T>>(
+        new SortedArrayIdxer<OID_T, VID_T>(std::move(id_list)));
+  }
+
+  void sync_request(const CommSpec& comm_spec, int target, int tag) override {
+    LOG(ERROR) << "HashMapIdxerBuilder should not be used to sync request";
+  }
+
+  void sync_response(const CommSpec& comm_spec, int source, int tag) override {
+    if (!sorted_) {
+      DistinctSort(keys_);
+      sorted_ = true;
+    }
+    sync_comm::Send(keys_, source, tag, comm_spec.comm());
+  }
+
+ private:
+  std::vector<OID_T> keys_;
+  bool sorted_ = false;
+};
+
+}  // namespace grape
+
+#endif  // GRAPE_VERTEX_MAP_IDXERS_SORTED_ARRAY_IDXER_H_
diff --git a/grape/vertex_map/local_vertex_map.h b/grape/vertex_map/local_vertex_map.h
deleted file mode 100644
index fe6878ea..00000000
--- a/grape/vertex_map/local_vertex_map.h
+++ /dev/null
@@ -1,280 +0,0 @@
-/** Copyright 2020 Alibaba Group Holding Limited.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#ifndef GRAPE_VERTEX_MAP_LOCAL_VERTEX_MAP_H_
-#define GRAPE_VERTEX_MAP_LOCAL_VERTEX_MAP_H_
-
-#include <algorithm>
-#include <atomic>
-#include <memory>
-#include <string>
-#include <thread>
-#include <utility>
-#include <vector>
-
-#include "grape/config.h"
-#include "grape/fragment/partitioner.h"
-#include "grape/graph/id_indexer.h"
-#include "grape/serialization/in_archive.h"
-#include "grape/serialization/out_archive.h"
-#include "grape/vertex_map/vertex_map_base.h"
-#include "grape/worker/comm_spec.h"
-
-namespace grape {
-
-template <typename OID_T, typename VID_T, typename PARTITIONER_T>
-class LocalVertexMap;
-
-template <typename OID_T, typename VID_T, typename PARTITIONER_T>
-class LocalVertexMapBuilder {
-  using internal_oid_t = typename InternalOID<OID_T>::type;
-
- private:
-  LocalVertexMapBuilder(
-      fid_t fid, std::vector<IdIndexer<internal_oid_t, VID_T>>& oid_to_index,
-      std::vector<IdIndexer<VID_T, VID_T>>& gid_to_index,
-      const PARTITIONER_T& partitioner, const IdParser<VID_T>& id_parser)
-      : fid_(fid),
-        oid_to_index_(oid_to_index),
-        gid_to_index_(gid_to_index),
-        partitioner_(partitioner),
-        id_parser_(id_parser) {}
-
- public:
-  ~LocalVertexMapBuilder() {}
-
-  void add_local_vertex(const internal_oid_t& id, VID_T& gid) {
-    assert(partitioner_.GetPartitionId(id) == fid_);
-    oid_to_index_[fid_].add(id, gid);
-    gid = id_parser_.generate_global_id(fid_, gid);
-  }
-
-  void add_vertex(const internal_oid_t& id) {
-    fid_t fid = partitioner_.GetPartitionId(id);
-    oid_to_index_[fid]._add(id);
-  }
-
-  void finish(LocalVertexMap<OID_T, VID_T, PARTITIONER_T>& vertex_map) {
-    const CommSpec& comm_spec = vertex_map.GetCommSpec();
-    int worker_id = comm_spec.worker_id();
-    int worker_num = comm_spec.worker_num();
-    std::thread request_thread([&]() {
-      for (int i = 1; i < worker_num; ++i) {
-        int dst_worker_id = (worker_id + i) % worker_num;
-        auto& indexer = oid_to_index_[comm_spec.WorkerToFrag(dst_worker_id)];
-        sync_comm::Send(indexer.keys(), dst_worker_id, 0, comm_spec.comm());
-        std::vector<VID_T> gid_list(indexer.size());
-        sync_comm::Recv(gid_list, dst_worker_id, 1, comm_spec.comm());
-        auto& gid_indexer =
-            gid_to_index_[comm_spec.WorkerToFrag(dst_worker_id)];
-        for (auto gid : gid_list) {
-          gid_indexer._add(gid);
-        }
-      }
-    });
-    std::thread response_thread([&]() {
-      for (int i = 1; i < worker_num; ++i) {
-        int src_worker_id = (worker_id + worker_num - i) % worker_num;
-        typename IdIndexer<internal_oid_t, VID_T>::key_buffer_t keys;
-        sync_comm::Recv(keys, src_worker_id, 0, comm_spec.comm());
-        std::vector<VID_T> gid_list(keys.size());
-        VID_T gid;
-        auto& native_indexer = oid_to_index_[fid_];
-        for (size_t k = 0; k < keys.size(); ++k) {
-          CHECK(native_indexer.get_index(keys[k], gid));
-          gid = id_parser_.generate_global_id(fid_, gid);
-          gid_list[k] = gid;
-        }
-        sync_comm::Send(gid_list, src_worker_id, 1, comm_spec.comm());
-      }
-    });
-
-    request_thread.join();
-    response_thread.join();
-    MPI_Barrier(comm_spec.comm());
-
-    vertex_map.vertices_num_.resize(comm_spec.fnum());
-    vertex_map.vertices_num_[fid_] = oid_to_index_[fid_].size();
-    sync_comm::AllGather(vertex_map.vertices_num_, comm_spec.comm());
-  }
-
- private:
-  template <typename _OID_T, typename _VID_T, typename _PARTITIONER_T>
-  friend class LocalVertexMap;
-
-  fid_t fid_;
-  std::vector<IdIndexer<internal_oid_t, VID_T>>& oid_to_index_;
-  std::vector<IdIndexer<VID_T, VID_T>>& gid_to_index_;
-  const PARTITIONER_T& partitioner_;
-  const IdParser<VID_T> id_parser_;
-};
-
-template <typename OID_T, typename VID_T,
-          typename PARTITIONER_T = HashPartitioner<OID_T>>
-class LocalVertexMap : public VertexMapBase<OID_T, VID_T, PARTITIONER_T> {
-  using base_t = VertexMapBase<OID_T, VID_T, PARTITIONER_T>;
-  using internal_oid_t = typename InternalOID<OID_T>::type;
-
- public:
-  explicit LocalVertexMap(const CommSpec& comm_spec) : base_t(comm_spec) {}
-  ~LocalVertexMap() = default;
-  void Init() {
-    oid_to_index_.resize(comm_spec_.fnum());
-    gid_to_index_.resize(comm_spec_.fnum());
-  }
-
-  size_t GetTotalVertexSize() const {
-    size_t size = 0;
-    for (auto v : vertices_num_) {
-      size += v;
-    }
-    return size;
-  }
-
-  size_t GetInnerVertexSize(fid_t fid) const { return vertices_num_[fid]; }
-  void AddVertex(const OID_T& oid) { LOG(FATAL) << "not implemented"; }
-
-  using base_t::Lid2Gid;
-  bool AddVertex(const OID_T& oid, VID_T& gid) {
-    LOG(FATAL) << "not implemented";
-    return false;
-  }
-
-  using base_t::GetFidFromGid;
-  using base_t::GetLidFromGid;
-  bool GetOid(const VID_T& gid, OID_T& oid) const {
-    fid_t fid = GetFidFromGid(gid);
-    return GetOid(fid, id_parser_.get_local_id(gid), oid);
-  }
-
-  bool GetOid(fid_t fid, const VID_T& lid, OID_T& oid) const {
-    internal_oid_t internal_oid;
-    if (fid == comm_spec_.fid()) {
-      if (oid_to_index_[fid].get_key(lid, internal_oid)) {
-        oid = InternalOID<OID_T>::FromInternal(internal_oid);
-        return true;
-      }
-    } else {
-      VID_T index;
-      if (gid_to_index_[fid].get_index(id_parser_.generate_global_id(fid, lid),
-                                       index)) {
-        if (oid_to_index_[fid].get_key(index, internal_oid)) {
-          oid = InternalOID<OID_T>::FromInternal(internal_oid);
-          return true;
-        }
-      }
-    }
-    return false;
-  }
-
-  bool GetGid(fid_t fid, const OID_T& oid, VID_T& gid) const {
-    internal_oid_t internal_oid(oid);
-    return _GetGid(fid, internal_oid, gid);
-  }
-
-  bool _GetGid(fid_t fid, const internal_oid_t& oid, VID_T& gid) const {
-    VID_T index;
-    if (fid == comm_spec_.fid()) {
-      if (oid_to_index_[fid].get_index(oid, index)) {
-        gid = id_parser_.generate_global_id(fid, index);
-        return true;
-      }
-    } else {
-      if (oid_to_index_[fid].get_index(oid, index)) {
-        return gid_to_index_[fid].get_key(index, gid);
-      }
-    }
-    return false;
-  }
-
-  bool GetGid(const OID_T& oid, VID_T& gid) const {
-    fid_t fid = partitioner_.GetPartitionId(oid);
-    return GetGid(fid, oid, gid);
-  }
-
-  bool _GetGid(const internal_oid_t& oid, VID_T& gid) const {
-    fid_t fid = partitioner_.GetPartitionId(oid);
-    return _GetGid(fid, oid, gid);
-  }
-
-  LocalVertexMapBuilder<OID_T, VID_T, PARTITIONER_T> GetLocalBuilder() {
-    fid_t fid = comm_spec_.fid();
-    return LocalVertexMapBuilder<OID_T, VID_T, PARTITIONER_T>(
-        fid, oid_to_index_, gid_to_index_, partitioner_, id_parser_);
-  }
-
-  template <typename IOADAPTOR_T>
-  void Serialize(const std::string& prefix) {
-    char fbuf[1024];
-    snprintf(fbuf, sizeof(fbuf), "%s/%s_%d", prefix.c_str(),
-             kSerializationVertexMapFilename, comm_spec_.fid());
-
-    auto io_adaptor =
-        std::unique_ptr<IOADAPTOR_T>(new IOADAPTOR_T(std::string(fbuf)));
-    io_adaptor->Open("wb");
-
-    base_t::serialize(io_adaptor);
-    for (auto& indexer : oid_to_index_) {
-      indexer.Serialize(io_adaptor);
-    }
-    for (auto& indexer : gid_to_index_) {
-      indexer.Serialize(io_adaptor);
-    }
-    io_adaptor->Close();
-  }
-
-  template <typename IOADAPTOR_T>
-  void Deserialize(const std::string& prefix, fid_t fid) {
-    char fbuf[1024];
-    snprintf(fbuf, sizeof(fbuf), "%s/%s_%d", prefix.c_str(),
-             kSerializationVertexMapFilename, fid);
-
-    auto io_adaptor =
-        std::unique_ptr<IOADAPTOR_T>(new IOADAPTOR_T(std::string(fbuf)));
-    io_adaptor->Open();
-
-    base_t::deserialize(io_adaptor);
-    oid_to_index_.resize(comm_spec_.fnum());
-    for (auto& indexer : oid_to_index_) {
-      indexer.Deserialize(io_adaptor);
-    }
-    gid_to_index_.resize(comm_spec_.fnum());
-    for (auto& indexer : gid_to_index_) {
-      indexer.Deserialize(io_adaptor);
-    }
-    io_adaptor->Close();
-  }
-
-  void UpdateToBalance(std::vector<VID_T>& vnum_list,
-                       std::vector<std::vector<VID_T>>& gid_maps) {
-    LOG(FATAL) << "not implemented";
-  }
-
- private:
-  template <typename _OID_T, typename _VID_T, typename _PARTITIONER_T>
-  friend class LocalVertexMapBuilder;
-
-  std::vector<IdIndexer<internal_oid_t, VID_T>> oid_to_index_;
-  std::vector<IdIndexer<VID_T, VID_T>> gid_to_index_;
-  using base_t::comm_spec_;
-  using base_t::id_parser_;
-  using base_t::partitioner_;
-
-  std::vector<VID_T> vertices_num_;
-};
-
-}  // namespace grape
-
-#endif  // GRAPE_VERTEX_MAP_LOCAL_VERTEX_MAP_H_
diff --git a/grape/vertex_map/partitioner.h b/grape/vertex_map/partitioner.h
new file mode 100644
index 00000000..82a3cb50
--- /dev/null
+++ b/grape/vertex_map/partitioner.h
@@ -0,0 +1,292 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_VERTEX_MAP_PARTITIONER_H_
+#define GRAPE_VERTEX_MAP_PARTITIONER_H_
+
+#include <string>
+#include "grape/io/io_adaptor_base.h"
+
+namespace grape {
+
+enum class PartitionerType {
+  kHashPartitioner,
+  kMapPartitioner,
+  kSegmentedPartitioner,
+};
+
+inline PartitionerType parse_partitioner_type_name(const std::string& name) {
+  if (name == "hash") {
+    return PartitionerType::kHashPartitioner;
+  } else if (name == "map") {
+    return PartitionerType::kMapPartitioner;
+  } else if (name == "segment") {
+    return PartitionerType::kSegmentedPartitioner;
+  } else {
+    LOG(ERROR) << "unrecognized partitioner: " << name
+               << ", use map partitioner as default";
+    return PartitionerType::kMapPartitioner;
+  }
+}
+
+template <typename OID_T>
+class IPartitioner {
+ public:
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+  virtual ~IPartitioner() = default;
+
+  virtual fid_t GetPartitionId(const internal_oid_t& oid) const = 0;
+
+  virtual void SetPartitionId(const internal_oid_t& oid, fid_t fid) = 0;
+
+  virtual void serialize(std::unique_ptr<IOAdaptorBase>& writer) = 0;
+
+  virtual void deserialize(std::unique_ptr<IOAdaptorBase>& reader) = 0;
+
+  virtual size_t memory_usage() const = 0;
+
+  virtual PartitionerType type() const = 0;
+};
+
+template <typename OID_T, typename HASH_T = std::hash<OID_T>>
+class HashPartitioner : public IPartitioner<OID_T> {
+ public:
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+  HashPartitioner() : hash_(), fnum_(1) {}
+  explicit HashPartitioner(size_t frag_num) : hash_(), fnum_(frag_num) {}
+
+  fid_t GetPartitionId(const internal_oid_t& oid) const override {
+    return static_cast<fid_t>(hash_(OID_T(oid)) % fnum_);
+  }
+
+  void SetPartitionId(const internal_oid_t& oid, fid_t fid) override {
+    if (GetPartitionId(oid) != fid) {
+      LOG(ERROR) << "HashPartitioner cannot set partition id";
+    }
+  }
+
+  void serialize(std::unique_ptr<IOAdaptorBase>& writer) override {
+    CHECK(writer->Write(&fnum_, sizeof(fid_t)));
+  }
+
+  void deserialize(std::unique_ptr<IOAdaptorBase>& reader) override {
+    CHECK(reader->Read(&fnum_, sizeof(fid_t)));
+  }
+
+  PartitionerType type() const override {
+    return PartitionerType::kHashPartitioner;
+  }
+
+  size_t memory_usage() const override { return 0; }
+
+ private:
+  HASH_T hash_;
+  fid_t fnum_;
+};
+
+template <typename OID_T>
+class MapPartitioner : public IPartitioner<OID_T> {
+ public:
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+  MapPartitioner() : fnum_(0) {}
+  explicit MapPartitioner(fid_t fnum) : fnum_(fnum) {}
+  MapPartitioner(fid_t fnum, const std::vector<OID_T>& oid_list) {
+    fnum_ = fnum;
+    Init(fnum, oid_list);
+  }
+  ~MapPartitioner() = default;
+
+  void Init(fid_t fnum, const std::vector<OID_T>& oid_list) {
+    fnum_ = fnum;
+    size_t frag_num = fnum;
+    size_t vnum = oid_list.size();
+    size_t frag_vnum = (vnum + frag_num - 1) / frag_num;
+    o2f_.clear();
+    o2f_.reserve(vnum);
+    for (size_t i = 0; i < vnum; ++i) {
+      fid_t fid = static_cast<fid_t>(i / frag_vnum);
+      o2f_.emplace(oid_list[i], fid);
+    }
+  }
+
+  void Init(const std::vector<std::vector<OID_T>>& oid_lists) {
+    size_t frag_num = oid_lists.size();
+    fnum_ = frag_num;
+    o2f_.clear();
+    for (size_t i = 0; i < frag_num; ++i) {
+      for (const auto& oid : oid_lists[i]) {
+        o2f_.emplace(oid, i);
+      }
+    }
+  }
+
+  fid_t GetPartitionId(const internal_oid_t& oid) const override {
+    auto iter = o2f_.find(OID_T(oid));
+    if (iter == o2f_.end()) {
+      return fnum_;
+    }
+    return iter->second;
+  }
+
+  void SetPartitionId(const internal_oid_t& oid, fid_t fid) override {
+    o2f_[OID_T(oid)] = fid;
+  }
+
+  void serialize(std::unique_ptr<IOAdaptorBase>& writer) override {
+    InArchive arc;
+    arc << fnum_ << o2f_;
+    CHECK(writer->WriteArchive(arc));
+  }
+
+  void deserialize(std::unique_ptr<IOAdaptorBase>& reader) override {
+    OutArchive arc;
+    CHECK(reader->ReadArchive(arc));
+    arc >> fnum_ >> o2f_;
+  }
+
+  PartitionerType type() const override {
+    return PartitionerType::kMapPartitioner;
+  }
+
+  size_t memory_usage() const override { return o2f_.memory_usage(); }
+
+ private:
+  fid_t fnum_;
+  ska::flat_hash_map<OID_T, fid_t> o2f_;
+};
+
+template <typename OID_T>
+class SegmentedPartitioner : public IPartitioner<OID_T> {
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+ public:
+  SegmentedPartitioner() : fnum_(0) {}
+  SegmentedPartitioner(fid_t fnum, const std::vector<OID_T>& sorted_oid_list) {
+    fnum_ = fnum;
+    size_t part_size = (sorted_oid_list.size() + fnum - 1) / fnum;
+    for (size_t i = 1; i < fnum; ++i) {
+      boundaries_.emplace_back(sorted_oid_list[i * part_size]);
+    }
+  }
+  explicit SegmentedPartitioner(const std::vector<OID_T>& boundaries)
+      : fnum_(boundaries.size() + 1), boundaries_(boundaries) {}
+  ~SegmentedPartitioner() = default;
+
+  void Init(fid_t fnum, const std::vector<OID_T>& boundaries) {
+    fnum_ = fnum;
+    boundaries_ = boundaries;
+    CHECK_EQ(fnum_, boundaries_.size() + 1);
+  }
+
+  fid_t GetPartitionId(const internal_oid_t& oid) const override {
+    auto iter =
+        std::upper_bound(boundaries_.begin(), boundaries_.end(), OID_T(oid));
+    return static_cast<fid_t>(iter - boundaries_.begin());
+  }
+
+  void SetPartitionId(const internal_oid_t& oid, fid_t fid) override {
+    LOG(FATAL) << "SegmentedPartitioner cannot set partition id";
+  }
+
+  PartitionerType type() const override {
+    return PartitionerType::kSegmentedPartitioner;
+  }
+
+  void serialize(std::unique_ptr<IOAdaptorBase>& writer) override {
+    InArchive arc;
+    arc << fnum_ << boundaries_;
+    CHECK(writer->WriteArchive(arc));
+  }
+
+  void deserialize(std::unique_ptr<IOAdaptorBase>& reader) override {
+    OutArchive arc;
+    CHECK(reader->ReadArchive(arc));
+    arc >> fnum_ >> boundaries_;
+  }
+
+  size_t memory_usage() const override {
+    return boundaries_.size() * sizeof(OID_T);
+  }
+
+ private:
+  fid_t fnum_;
+  std::vector<OID_T> boundaries_;
+};
+
+template <typename OID_T>
+void serialize_partitioner(std::unique_ptr<IOAdaptorBase>& writer,
+                           std::unique_ptr<IPartitioner<OID_T>>& partitioner) {
+  int type = static_cast<int>(partitioner->type());
+  writer->Write(&type, sizeof(type));
+  partitioner->serialize(writer);
+}
+
+template <typename OID_T>
+std::unique_ptr<IPartitioner<OID_T>> deserialize_partitioner(
+    std::unique_ptr<IOAdaptorBase>& reader) {
+  int type;
+  reader->Read(&type, sizeof(type));
+  std::unique_ptr<IPartitioner<OID_T>> partitioner(nullptr);
+  switch (static_cast<PartitionerType>(type)) {
+  case PartitionerType::kHashPartitioner:
+    partitioner =
+        std::unique_ptr<IPartitioner<OID_T>>(new HashPartitioner<OID_T>());
+    break;
+  case PartitionerType::kMapPartitioner:
+    partitioner =
+        std::unique_ptr<IPartitioner<OID_T>>(new MapPartitioner<OID_T>());
+    break;
+  case PartitionerType::kSegmentedPartitioner:
+    partitioner =
+        std::unique_ptr<IPartitioner<OID_T>>(new SegmentedPartitioner<OID_T>());
+    break;
+  default:
+    LOG(FATAL) << "Unknown partitioner type";
+  }
+  if (partitioner) {
+    partitioner->deserialize(reader);
+  }
+  return partitioner;
+}
+
+}  // namespace grape
+
+#include <iostream>
+
+namespace std {
+
+inline ostream& operator<<(ostream& os, const grape::PartitionerType& type) {
+  switch (type) {
+  case grape::PartitionerType::kHashPartitioner:
+    os << "hash";
+    break;
+  case grape::PartitionerType::kMapPartitioner:
+    os << "map";
+    break;
+  case grape::PartitionerType::kSegmentedPartitioner:
+    os << "segment";
+    break;
+  default:
+    os << "unknown";
+  }
+  return os;
+}
+
+}  // namespace std
+
+#endif  // GRAPE_VERTEX_MAP_PARTITIONER_H_
diff --git a/grape/vertex_map/vertex_map.h b/grape/vertex_map/vertex_map.h
new file mode 100644
index 00000000..a1c385bd
--- /dev/null
+++ b/grape/vertex_map/vertex_map.h
@@ -0,0 +1,525 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef GRAPE_VERTEX_MAP_VERTEX_MAP_H_
+#define GRAPE_VERTEX_MAP_VERTEX_MAP_H_
+
+#include <thread>
+
+#include "grape/fragment/id_parser.h"
+#include "grape/util.h"
+#include "grape/vertex_map/idxers/idxers.h"
+#include "grape/vertex_map/partitioner.h"
+
+namespace grape {
+
+template <typename OID_T, typename VID_T>
+class VertexMapBuilder;
+
+template <typename OID_T, typename VID_T>
+class VertexMap {
+ public:
+  using oid_t = OID_T;
+  using vid_t = VID_T;
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+  VertexMap(const VertexMap&) = delete;
+  VertexMap() : partitioner_(nullptr) {}
+  ~VertexMap() {}
+
+  fid_t GetFragmentNum() const { return fnum_; }
+
+  fid_t GetFragmentId(const OID_T& oid) const {
+    internal_oid_t internal_oid(oid);
+    return partitioner_->GetPartitionId(internal_oid);
+  }
+
+  const IdParser<VID_T>& GetIdParser() const { return id_parser_; }
+
+  const IPartitioner<OID_T>& GetPartitioner() const { return *partitioner_; }
+
+  VID_T Lid2Gid(fid_t fid, const VID_T& lid) const {
+    return id_parser_.generate_global_id(fid, lid);
+  }
+
+  fid_t GetFidFromGid(const VID_T& gid) const {
+    return id_parser_.get_fragment_id(gid);
+  }
+
+  VID_T GetLidFromGid(const VID_T& gid) const {
+    return id_parser_.get_local_id(gid);
+  }
+
+  VID_T MaxVertexNum() const { return id_parser_.max_local_id(); }
+
+  size_t GetTotalVertexSize() const { return total_vertex_size_; }
+
+  size_t GetInnerVertexSize(fid_t fid) const { return inner_vertex_size_[fid]; }
+
+  void UpdateToBalance(const CommSpec& comm_spec,
+                       const std::vector<VID_T>& vnum_list,
+                       const std::vector<std::vector<VID_T>>& gid_maps);
+
+  bool GetOid(const VID_T& gid, OID_T& oid) const {
+    fid_t fid = GetFidFromGid(gid);
+    return GetOid(fid, GetLidFromGid(gid), oid);
+  }
+
+  bool GetOid(fid_t fid, const VID_T& lid, OID_T& oid) const {
+    internal_oid_t internal_oid;
+    if (fid >= fnum_) {
+      return false;
+    }
+    if (idxers_[fid]->get_key(lid, internal_oid)) {
+      oid = InternalOID<OID_T>::FromInternal(internal_oid);
+      return true;
+    }
+    return false;
+  }
+
+  bool GetGid(fid_t fid, const OID_T& oid, VID_T& gid) const {
+    internal_oid_t internal_oid(oid);
+    if (fid >= fnum_) {
+      return false;
+    }
+    if (idxers_[fid]->get_index(internal_oid, gid)) {
+      gid = Lid2Gid(fid, gid);
+      return true;
+    }
+    return false;
+  }
+
+  bool GetGid(const OID_T& oid, VID_T& gid) const {
+    fid_t fid = partitioner_->GetPartitionId(oid);
+    if (fid == fnum_) {
+      return false;
+    }
+    return GetGid(fid, oid, gid);
+  }
+
+  void reset() { idxers_.clear(); }
+
+  void ExtendVertices(const CommSpec& comm_spec,
+                      std::vector<OID_T>&& local_vertices_to_add) {
+    int worker_id = comm_spec.worker_id();
+    DistinctSort(local_vertices_to_add);
+    bool unpartitioned_id = false;
+    for (size_t i = 0; i < local_vertices_to_add.size();) {
+      fid_t fid = partitioner_->GetPartitionId(local_vertices_to_add[i]);
+      if (fid == fnum_) {
+        unpartitioned_id = true;
+      } else if (comm_spec.FragToWorker(fid) != worker_id) {
+        LOG(ERROR) << "Partition id is not consistent for vertex - "
+                   << local_vertices_to_add[i] << ", discarded...";
+        std::swap(local_vertices_to_add[i], local_vertices_to_add.back());
+        local_vertices_to_add.pop_back();
+        continue;
+      } else {
+        vid_t index;
+        if (idxers_[fid]->get_index(internal_oid_t(local_vertices_to_add[i]),
+                                    index)) {
+          LOG(ERROR) << "Vertex already exists - " << local_vertices_to_add[i];
+          std::swap(local_vertices_to_add[i], local_vertices_to_add.back());
+          local_vertices_to_add.pop_back();
+          continue;
+        }
+      }
+      ++i;
+    }
+    int state = 0;
+    if (unpartitioned_id) {
+      state = 1;
+    }
+    std::vector<int> states(comm_spec.fnum(), 0);
+    states[worker_id] = state;
+    sync_comm::AllGather(states, comm_spec.comm());
+    // need to update partitioner with new vertices
+    std::vector<std::vector<OID_T>> global_vertices_to_add(comm_spec.fnum());
+    global_vertices_to_add[comm_spec.fid()] = std::move(local_vertices_to_add);
+    sync_comm::AllGather(global_vertices_to_add, comm_spec.comm());
+    for (fid_t fid = 0; fid < fnum_; ++fid) {
+      if (states[fid] == 1) {
+        CHECK(partitioner_->type() == PartitionerType::kMapPartitioner);
+        for (auto& v : global_vertices_to_add[fid]) {
+          partitioner_->SetPartitionId(v, fid);
+        }
+      }
+      idxers_[fid] =
+          extend_indexer(std::move(idxers_[fid]), global_vertices_to_add[fid],
+                         static_cast<vid_t>(inner_vertex_size_[fid]));
+      inner_vertex_size_[fid] += global_vertices_to_add[fid].size();
+      total_vertex_size_ += global_vertices_to_add[fid].size();
+    }
+  }
+
+  template <typename IOADAPTOR_T>
+  void Serialize(const std::string& prefix, const CommSpec& comm_spec) {
+    if (idxer_type_ != IdxerType::kLocalIdxer) {
+      char fbuf[1024];
+      snprintf(fbuf, sizeof(fbuf), "%s/%s", prefix.c_str(),
+               kSerializationVertexMapFilename);
+      std::string path = std::string(fbuf);
+      if (comm_spec.worker_id() == 0) {
+        serialize_impl<IOADAPTOR_T>(path);
+      }
+      MPI_Barrier(comm_spec.comm());
+      if (!exists_file(path) && comm_spec.local_id() == 0) {
+        serialize_impl<IOADAPTOR_T>(path);
+      }
+      MPI_Barrier(comm_spec.comm());
+      if (!exists_file(path)) {
+        serialize_impl<IOADAPTOR_T>(path);
+      }
+    } else {
+      char fbuf[1024];
+      snprintf(fbuf, sizeof(fbuf), "%s/%s_%d", prefix.c_str(),
+               kSerializationVertexMapFilename, comm_spec.fid());
+      serialize_impl<IOADAPTOR_T>(std::string(fbuf));
+    }
+  }
+
+  template <typename IOADAPTOR_T>
+  void Deserialize(const std::string& prefix, const CommSpec& comm_spec) {
+    char local_fbuf[1024];
+    snprintf(local_fbuf, sizeof(local_fbuf), "%s/%s_%d", prefix.c_str(),
+             kSerializationVertexMapFilename, comm_spec.fid());
+    if (exists_file(local_fbuf)) {
+      deserialize_impl<IOADAPTOR_T>(std::string(local_fbuf));
+    } else {
+      char global_fbuf[1024];
+      snprintf(global_fbuf, sizeof(global_fbuf), "%s/%s", prefix.c_str(),
+               kSerializationVertexMapFilename);
+      if (exists_file(global_fbuf)) {
+        deserialize_impl<IOADAPTOR_T>(std::string(global_fbuf));
+      } else {
+        LOG(FATAL) << "Cannot find vertex map file.";
+      }
+    }
+
+    id_parser_.init(fnum_);
+  }
+
+  VertexMap& operator=(VertexMap&& other) {
+    if (this == &other) {
+      return *this;
+    }
+
+    this->fid_ = other.fid_;
+    this->fnum_ = other.fnum_;
+    this->idxer_type_ = other.idxer_type_;
+    this->total_vertex_size_ = other.total_vertex_size_;
+    this->inner_vertex_size_ = std::move(other.inner_vertex_size_);
+
+    this->idxers_ = std::move(other.idxers_);
+    this->partitioner_ = std::move(other.partitioner_);
+    this->id_parser_.init(fnum_);
+
+    other.idxers_.clear();
+    other.total_vertex_size_ = 0;
+    other.inner_vertex_size_.clear();
+
+    return *this;
+  }
+
+  PartitionerType partitioner_type() const { return partitioner_->type(); }
+  IdxerType idxer_type() const { return idxer_type_; }
+
+ private:
+  template <typename IOADAPTOR>
+  void serialize_impl(const std::string& path) {
+    auto io_adaptor = std::unique_ptr<IOAdaptorBase>(new IOADAPTOR(path));
+    io_adaptor->Open("wb");
+    InArchive arc;
+    arc << fid_ << fnum_ << idxer_type_ << total_vertex_size_
+        << inner_vertex_size_;
+    io_adaptor->WriteArchive(arc);
+    for (fid_t fid = 0; fid < fnum_; ++fid) {
+      serialize_idxer<OID_T, VID_T>(io_adaptor, idxers_[fid]);
+    }
+    serialize_partitioner<OID_T>(io_adaptor, partitioner_);
+  }
+
+  template <typename IOADAPTOR>
+  void deserialize_impl(const std::string& path) {
+    auto io_adaptor = std::unique_ptr<IOAdaptorBase>(new IOADAPTOR(path));
+    io_adaptor->Open();
+    OutArchive arc;
+    io_adaptor->ReadArchive(arc);
+    arc >> fid_ >> fnum_ >> idxer_type_ >> total_vertex_size_ >>
+        inner_vertex_size_;
+    for (fid_t fid = 0; fid < fnum_; ++fid) {
+      idxers_.emplace_back(deserialize_idxer<OID_T, VID_T>(io_adaptor));
+    }
+    partitioner_ = deserialize_partitioner<OID_T>(io_adaptor);
+  }
+
+  template <typename _OID_T, typename _VID_T>
+  friend class VertexMapBuilder;
+
+  fid_t fid_;
+  fid_t fnum_;
+
+  IdxerType idxer_type_;
+
+  size_t total_vertex_size_;
+  std::vector<size_t> inner_vertex_size_;
+
+  std::unique_ptr<IPartitioner<OID_T>> partitioner_;
+  std::vector<std::unique_ptr<IdxerBase<OID_T, VID_T>>> idxers_;
+  IdParser<VID_T> id_parser_;
+};
+
+template <typename OID_T, typename VID_T>
+class VertexMapBuilder {
+  using internal_oid_t = typename InternalOID<OID_T>::type;
+
+ public:
+  VertexMapBuilder(fid_t fid, fid_t fnum,
+                   std::unique_ptr<IPartitioner<OID_T>>&& partitioner,
+                   IdxerType idxer_type)
+      : fid_(fid),
+        fnum_(fnum),
+        idxer_type_(idxer_type),
+        partitioner_(std::move(partitioner)) {
+    if (idxer_type_ == IdxerType::kSortedArrayIdxer) {
+      for (fid_t i = 0; i < fnum; ++i) {
+        if (i != fid) {
+          idxer_builders_.emplace_back(
+              new SortedArrayIdxerDummyBuilder<OID_T, VID_T>());
+        } else {
+          idxer_builders_.emplace_back(
+              new SortedArrayIdxerBuilder<OID_T, VID_T>());
+        }
+      }
+    } else if (idxer_type_ == IdxerType::kHashMapIdxer) {
+      for (fid_t i = 0; i < fnum; ++i) {
+        if (i != fid) {
+          idxer_builders_.emplace_back(
+              new HashMapIdxerDummyBuilder<OID_T, VID_T>());
+        } else {
+          idxer_builders_.emplace_back(new HashMapIdxerBuilder<OID_T, VID_T>());
+        }
+      }
+    } else if (idxer_type_ == IdxerType::kPTHashIdxer) {
+      for (fid_t i = 0; i < fnum; ++i) {
+        if (i != fid) {
+          idxer_builders_.emplace_back(
+              new PTHashIdxerDummyBuilder<OID_T, VID_T>());
+        } else {
+          idxer_builders_.emplace_back(new PTHashIdxerBuilder<OID_T, VID_T>());
+        }
+      }
+    } else if (idxer_type_ == IdxerType::kLocalIdxer) {
+      for (fid_t i = 0; i < fnum; ++i) {
+        if (i != fid) {
+          idxer_builders_.emplace_back(new LocalIdxerBuilder<OID_T, VID_T>());
+        } else {
+          idxer_builders_.emplace_back(new HashMapIdxerBuilder<OID_T, VID_T>());
+        }
+      }
+    } else if (idxer_type == IdxerType::kHashMapIdxerView) {
+      for (fid_t i = 0; i < fnum; ++i) {
+        if (i != fid) {
+          idxer_builders_.emplace_back(
+              new HashMapIdxerViewDummyBuilder<OID_T, VID_T>());
+        } else {
+          idxer_builders_.emplace_back(
+              new HashMapIdxerViewBuilder<OID_T, VID_T>());
+        }
+      }
+    } else {
+      LOG(FATAL) << "Unknown idxer type";
+    }
+  }
+
+  ~VertexMapBuilder() {}
+
+  fid_t get_fragment_id(const internal_oid_t& oid) const {
+    return partitioner_->GetPartitionId(oid);
+  }
+
+  void add_vertex(const internal_oid_t& id) {
+    fid_t fid = partitioner_->GetPartitionId(id);
+    if (fid < fnum_) {
+      idxer_builders_[fid]->add(id);
+    } else {
+      LOG(ERROR) << "add vertex - " << id << " failed, unknwon partition id";
+    }
+  }
+
+  void finish(const CommSpec& comm_spec, VertexMap<OID_T, VID_T>& vertex_map) {
+    int worker_id = comm_spec.worker_id();
+    int worker_num = comm_spec.worker_num();
+    fid_t fnum = comm_spec.fnum();
+    {
+      std::thread response_thread = std::thread([&]() {
+        int dst_worker_id = (worker_id + worker_num - 1) % worker_num;
+        while (dst_worker_id != worker_id) {
+          for (fid_t fid = 0; fid < fnum; ++fid) {
+            if (comm_spec.FragToWorker(fid) != worker_id) {
+              continue;
+            }
+            idxer_builders_[fid]->sync_response(comm_spec, dst_worker_id, 0);
+          }
+          dst_worker_id = (dst_worker_id + worker_num - 1) % worker_num;
+        }
+      });
+      std::thread request_thread = std::thread([&]() {
+        int src_worker_id = (worker_id + 1) % worker_num;
+        while (src_worker_id != worker_id) {
+          for (fid_t fid = 0; fid < fnum; ++fid) {
+            if (comm_spec.FragToWorker(fid) != src_worker_id) {
+              continue;
+            }
+            idxer_builders_[fid]->sync_request(comm_spec, src_worker_id, 0);
+          }
+          src_worker_id = (src_worker_id + 1) % worker_num;
+        }
+      });
+
+      request_thread.join();
+      response_thread.join();
+      MPI_Barrier(comm_spec.comm());
+    }
+
+    vertex_map.reset();
+    vertex_map.fid_ = fid_;
+    vertex_map.fnum_ = fnum;
+    vertex_map.idxer_type_ = idxer_type_;
+    vertex_map.partitioner_ = std::move(partitioner_);
+    for (fid_t fid = 0; fid < fnum; ++fid) {
+      vertex_map.idxers_.emplace_back(idxer_builders_[fid]->finish());
+    }
+    idxer_builders_.clear();
+    vertex_map.id_parser_.init(fnum);
+
+    vertex_map.inner_vertex_size_.resize(fnum, 0);
+    vertex_map.inner_vertex_size_[fid_] = vertex_map.idxers_[fid_]->size();
+
+    sync_comm::AllGather(vertex_map.inner_vertex_size_, comm_spec.comm());
+
+    size_t total = 0;
+    for (fid_t i = 0; i < fnum; ++i) {
+      total += vertex_map.inner_vertex_size_[i];
+    }
+    vertex_map.total_vertex_size_ = total;
+  }
+
+ private:
+  fid_t fid_;
+  fid_t fnum_;
+  IdxerType idxer_type_;
+  std::unique_ptr<IPartitioner<OID_T>> partitioner_;
+  std::vector<std::unique_ptr<IdxerBuilderBase<OID_T, VID_T>>> idxer_builders_;
+};
+
+template <typename OID_T, typename VID_T>
+void VertexMap<OID_T, VID_T>::UpdateToBalance(
+    const CommSpec& comm_spec, const std::vector<VID_T>& vnum_list,
+    const std::vector<std::vector<VID_T>>& gid_maps) {
+  fid_t fnum = comm_spec.fnum();
+  std::vector<std::vector<oid_t>> oid_lists(fnum);
+  std::vector<std::vector<VID_T>> unresolved_lids(fnum);
+  std::vector<std::vector<std::pair<fid_t, VID_T>>> unresolved_vertices(fnum);
+  std::vector<std::vector<OID_T>> unresolved_oids(fnum);
+  for (fid_t fid = 0; fid < fnum; ++fid) {
+    VID_T num = inner_vertex_size_[fid];
+    CHECK_EQ(num, gid_maps[fid].size());
+    for (VID_T lid = 0; lid < num; ++lid) {
+      VID_T new_gid = gid_maps[fid][lid];
+      internal_oid_t oid;
+      fid_t new_fid = GetFidFromGid(new_gid);
+      VID_T new_lid = GetLidFromGid(new_gid);
+      if (!idxers_[fid]->get_key(lid, oid)) {
+        unresolved_lids[fid].push_back(lid);
+        unresolved_vertices[fid].push_back(std::make_pair(new_fid, new_lid));
+      } else {
+        if (oid_lists[new_fid].size() <= new_lid) {
+          oid_lists[new_fid].resize(new_lid + 1);
+        }
+        oid_lists[new_fid][new_lid] = oid_t(oid);
+      }
+    }
+  }
+  {
+    std::thread request_thread = std::thread([&]() {
+      int src_worker_id = (comm_spec.worker_id() + 1) % comm_spec.worker_num();
+      while (src_worker_id != comm_spec.worker_id()) {
+        for (fid_t fid = 0; fid < fnum; ++fid) {
+          if (comm_spec.FragToWorker(fid) != src_worker_id) {
+            continue;
+          }
+          sync_comm::Send(unresolved_lids[fid], src_worker_id, 0,
+                          comm_spec.comm());
+          sync_comm::Recv(unresolved_oids[fid], src_worker_id, 1,
+                          comm_spec.comm());
+        }
+        src_worker_id = (src_worker_id + 1) % comm_spec.worker_num();
+      }
+    });
+    std::thread response_thread = std::thread([&]() {
+      int dst_worker_id = (comm_spec.worker_id() + comm_spec.worker_num() - 1) %
+                          comm_spec.worker_num();
+      while (dst_worker_id != comm_spec.worker_id()) {
+        for (fid_t fid = 0; fid < fnum; ++fid) {
+          if (comm_spec.FragToWorker(fid) != comm_spec.worker_id()) {
+            continue;
+          }
+          std::vector<VID_T> lid_list;
+          sync_comm::Recv(lid_list, dst_worker_id, 0, comm_spec.comm());
+          std::vector<OID_T> oid_list;
+          for (auto lid : lid_list) {
+            OID_T oid{};
+            if (!GetOid(fid, lid, oid)) {
+              LOG(ERROR) << "Cannot find oid for lid " << lid;
+            }
+            oid_list.push_back(oid);
+          }
+          sync_comm::Send(oid_list, dst_worker_id, 1, comm_spec.comm());
+        }
+        dst_worker_id = (dst_worker_id + comm_spec.worker_num() - 1) %
+                        comm_spec.worker_num();
+      }
+    });
+    response_thread.join();
+    request_thread.join();
+    MPI_Barrier(comm_spec.comm());
+  }
+  for (fid_t fid = 0; fid < fnum; ++fid) {
+    for (size_t i = 0; i < unresolved_lids[fid].size(); ++i) {
+      OID_T oid = unresolved_oids[fid][i];
+      const auto& pair = unresolved_vertices[fid][i];
+      oid_lists[pair.first][pair.second] = oid;
+    }
+  }
+
+  std::unique_ptr<MapPartitioner<OID_T>> new_partitioner(
+      new MapPartitioner<OID_T>(fnum_));
+  new_partitioner->Init(oid_lists);
+
+  VertexMapBuilder<OID_T, VID_T> builder(
+      comm_spec.fid(), comm_spec.fnum(), std::move(new_partitioner), true,
+      idxers_[0]->type() == IdxerType::kPTHashIdxer);
+  for (auto& oid : oid_lists[comm_spec.fid()]) {
+    internal_oid_t internal_oid(oid);
+    builder.add_vertex(internal_oid);
+  }
+
+  builder.finish(comm_spec, *this);
+}
+
+}  // namespace grape
+
+#endif  // GRAPE_VERTEX_MAP_VERTEX_MAP_H_
diff --git a/grape/vertex_map/vertex_map_base.h b/grape/vertex_map/vertex_map_base.h
deleted file mode 100644
index 3ba30bcb..00000000
--- a/grape/vertex_map/vertex_map_base.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/** Copyright 2020 Alibaba Group Holding Limited.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#ifndef GRAPE_VERTEX_MAP_VERTEX_MAP_BASE_H_
-#define GRAPE_VERTEX_MAP_VERTEX_MAP_BASE_H_
-
-#include <string>
-#include <vector>
-
-#include "grape/config.h"
-#include "grape/fragment/id_parser.h"
-#include "grape/serialization/in_archive.h"
-#include "grape/serialization/out_archive.h"
-#include "grape/worker/comm_spec.h"
-
-namespace grape {
-
-/**
- * @brief VertexMapBase manages some mapping about vertices.
- *
- * a <VertexMapBase> manages:
- *
- * 1) which fragment a vertex resides in as a inner_vertex, for edge-cut
- * distributed graphs;
- *
- * 2) which fragment a vertex resides in as a master_vertex,
- * for vertex-cut distributed graphs;
- *
- * 3) the mapping from ids. There are 3 kinds of vertex ids in grape.
- *
- *  - original_id (a.k.a., OID), is provided by the origin dataset, it may be
- * not continoues, or even strings.
- *
- *  - local_id (a.k.a., LID), is allocated WITHIN a fragment, it is continoues
- * and increased from 1.
- *
- *  - global_id (a.k.a., GID), is unique in the distributed graph and works as
- * the identifier of a vertex in libgrape-lite. It consists of two parts and
- * formatted as fid|local_id.
- *
- * @note: The pure virtual functions in the class work as interfaces,
- * instructing sub-classes to implement. The override functions in the derived
- * classes would be invoked directly, not via virtual functions.
- *
- * @tparam OID_T
- * @tparam VID_T
- */
-template <typename OID_T, typename VID_T, typename PARTITIONER_T>
-class VertexMapBase {
- public:
-  using partitioner_t = PARTITIONER_T;
-  using oid_t = OID_T;
-  using vid_t = VID_T;
-  explicit VertexMapBase(const CommSpec& comm_spec)
-      : comm_spec_(comm_spec), partitioner_() {
-    comm_spec_.Dup();
-    id_parser_.init(comm_spec_.fnum());
-  }
-  virtual ~VertexMapBase() = default;
-
-  void SetPartitioner(const PARTITIONER_T& partitioner) {
-    partitioner_ = partitioner;
-  }
-
-  void SetPartitioner(PARTITIONER_T&& partitioner) {
-    partitioner_ = std::move(partitioner);
-  }
-
-  fid_t GetFragmentNum() const { return comm_spec_.fnum(); }
-
-  VID_T Lid2Gid(fid_t fid, const VID_T& lid) const {
-    return id_parser_.generate_global_id(fid, lid);
-  }
-
-  fid_t GetFidFromGid(const VID_T& gid) const {
-    return id_parser_.get_fragment_id(gid);
-  }
-
-  VID_T GetLidFromGid(const VID_T& gid) const {
-    return id_parser_.get_local_id(gid);
-  }
-
-  VID_T MaxVertexNum() const { return id_parser_.max_local_id(); }
-
-  const CommSpec& GetCommSpec() const { return comm_spec_; }
-
-  template <typename IOADAPTOR_T>
-  void serialize(std::unique_ptr<IOADAPTOR_T>& writer) {
-    partitioner_.template serialize<IOADAPTOR_T>(writer);
-  }
-
-  template <typename IOADAPTOR_T>
-  void deserialize(std::unique_ptr<IOADAPTOR_T>& reader) {
-    id_parser_.init(comm_spec_.fnum());
-    partitioner_.template deserialize<IOADAPTOR_T>(reader);
-  }
-
-  fid_t GetFragmentId(const OID_T& oid) const {
-    return partitioner_.GetPartitionId(oid);
-  }
-
-  const PARTITIONER_T& GetPartitioner() const { return partitioner_; }
-
-  PARTITIONER_T& GetPartitioner() { return partitioner_; }
-
- protected:
-  CommSpec comm_spec_;
-  PARTITIONER_T partitioner_;
-  IdParser<VID_T> id_parser_;
-
- public:
-  // get metadata of the graph.
-  virtual size_t GetTotalVertexSize() const = 0;
-  virtual size_t GetInnerVertexSize(fid_t fid) const = 0;
-
-  // for constructing the vertexmap.
-  virtual void AddVertex(const OID_T& oid) = 0;
-  virtual bool AddVertex(const OID_T& oid, VID_T& gid) = 0;
-
-  virtual void UpdateToBalance(std::vector<VID_T>& vnum_list,
-                               std::vector<std::vector<VID_T>>& gid_maps) = 0;
-
-  // convert the vertex ids with the help of mappings.
-  virtual bool GetOid(const VID_T& gid, OID_T& oid) const = 0;
-
-  virtual bool GetOid(fid_t fid, const VID_T& lid, OID_T& oid) const = 0;
-
-  virtual bool GetGid(fid_t fid, const OID_T& oid, VID_T& gid) const = 0;
-
-  virtual bool GetGid(const OID_T& oid, VID_T& gid) const = 0;
-};
-
-}  // namespace grape
-
-#endif  // GRAPE_VERTEX_MAP_VERTEX_MAP_BASE_H_
diff --git a/misc/app_tests.sh b/misc/app_tests.sh
index 4e27305f..f9d1bbc9 100755
--- a/misc/app_tests.sh
+++ b/misc/app_tests.sh
@@ -99,7 +99,7 @@ function BasicTests() {
   RunApp ${np} cdlp_auto --cdlp_mr=10
   ExactVerify ${GRAPE_HOME}/dataset/${GRAPH}-CDLP
 
-  RunApp ${np} lcc --serialize=true --serialization_prefix=./serial/${GRAPH}
+  RunApp ${np} lcc --deserialize=true --serialization_prefix=./serial/${GRAPH}
   ExactVerify ${GRAPE_HOME}/dataset/${GRAPH}-LCC
 
   RunApp ${np} lcc_auto --deserialize=true --serialization_prefix=./serial/${GRAPH}
@@ -163,10 +163,20 @@ function MutableFragmentTests() {
   WCCVerify ${GRAPE_HOME}/dataset/${GRAPH}-WCC
 }
 
-function VertexMapTest() {
+function LoadTest() {
   NP=$1; shift
 
-  cmd="mpirun -n ${NP} ./vertex_map_tests --vfile ${GRAPE_HOME}/dataset/${GRAPH}.v --efile ${GRAPE_HOME}/dataset/${GRAPH}.e --out_prefix ./extra_tests_output --sssp_source=6 $@"
+  cmd="mpirun -n ${NP} ./load_tests --vfile ${GRAPE_HOME}/dataset/${GRAPH}.v --efile ${GRAPE_HOME}/dataset/${GRAPH}.e --out_prefix ./extra_tests_output --sssp_source=6 $@"
+
+  echo ${cmd}
+  eval ${cmd}
+}
+
+function VertexMapTest() {
+  NP=$1;
+
+  rm -rf ./vm_serial
+  cmd="mpirun -n ${NP} ./vertex_map_tests --vfile ${GRAPE_HOME}/dataset/${GRAPH}.v --efile ${GRAPE_HOME}/dataset/${GRAPH}.e --mutable_efile_base ${GRAPE_HOME}/dataset/${GRAPH}.e.mutable_base --mutable_efile_delta ${GRAPE_HOME}/dataset/${GRAPH}.e.mutable_delta --serialization_prefix=./vm_serial/${GRAPH}"
 
   echo ${cmd}
   eval ${cmd}
@@ -181,38 +191,32 @@ function VertexMapTestOnMutableFragment() {
   eval ${cmd}
 }
 
-function VertexMapTests() {
+function LoadTests() {
   np=$1; shift
 
-  VertexMapTest ${np} --string_id
+  LoadTest ${np} --loader_type basic
   ExactVerify ${GRAPE_HOME}/dataset/${GRAPH}-SSSP
-
-  VertexMapTest ${np} --nosegmented_partition
+  LoadTest ${np} --loader_type rb
   ExactVerify ${GRAPE_HOME}/dataset/${GRAPH}-SSSP
-
-  VertexMapTest ${np} --string_id --nosegmented_partition
+  LoadTest ${np} --loader_type efile 
   ExactVerify ${GRAPE_HOME}/dataset/${GRAPH}-SSSP
-
-  VertexMapTest ${np} --noglobal_vertex_map
+  LoadTest ${np} --loader_type local 
   ExactVerify ${GRAPE_HOME}/dataset/${GRAPH}-SSSP
 
-  VertexMapTest ${np} --string_id --noglobal_vertex_map
+  LoadTest ${np} --loader_type basic --string_id
   ExactVerify ${GRAPE_HOME}/dataset/${GRAPH}-SSSP
-
-  VertexMapTest ${np} --nosegmented_partition --noglobal_vertex_map
+  LoadTest ${np} --loader_type rb --string_id
   ExactVerify ${GRAPE_HOME}/dataset/${GRAPH}-SSSP
-
-  VertexMapTest ${np} --string_id --nosegmented_partition --noglobal_vertex_map
+  LoadTest ${np} --loader_type efile --string_id
   ExactVerify ${GRAPE_HOME}/dataset/${GRAPH}-SSSP
-
-  VertexMapTestOnMutableFragment ${np} --string_id
+  LoadTest ${np} --loader_type local --string_id
   ExactVerify ${GRAPE_HOME}/dataset/${GRAPH}-SSSP
+}
 
-  VertexMapTestOnMutableFragment ${np} --nosegmented_partition
-  ExactVerify ${GRAPE_HOME}/dataset/${GRAPH}-SSSP
+function VertexMapTests() {
+  np=$1; shift
 
-  VertexMapTestOnMutableFragment ${np} --string_id --nosegmented_partition
-  ExactVerify ${GRAPE_HOME}/dataset/${GRAPH}-SSSP
+  VertexMapTest ${np}
 }
 
 pushd ${GRAPE_HOME}/build
@@ -230,6 +234,7 @@ for np in ${proc_list}; do
   BasicTests ${np}
   MutableFragmentTests ${np}
   VertexMapTests ${np}
+  LoadTests ${np}
 done
 
 popd
diff --git a/misc/cuda_app_tests.sh b/misc/cuda_app_tests.sh
index 799a3545..1ef8a383 100755
--- a/misc/cuda_app_tests.sh
+++ b/misc/cuda_app_tests.sh
@@ -61,7 +61,7 @@ function RunAppWithELoader() {
   NP=$1; shift
   APP=$1; shift
 
-  cmd="mpirun -n ${NP} ./run_cuda_app --efile ${GRAPE_HOME}/dataset/${GRAPH}.e --application ${APP} --out_prefix ./extra_tests_output --nosegmented_partition $@"
+  cmd="mpirun -n ${NP} ./run_cuda_app --efile ${GRAPE_HOME}/dataset/${GRAPH}.e --application ${APP} --out_prefix ./extra_tests_output --partitioner_type=hash $@"
   echo ${cmd}
   eval ${cmd}
 }
@@ -117,16 +117,16 @@ for np in ${proc_list}; do
     RunApp ${np} wcc -lb=${lb} ${SER} --serialization_prefix=./serial/${GRAPH}_wcc
     WCCVerify ${GRAPE_HOME}/dataset/${GRAPH}-WCC
 
-    RunApp ${np} wcc_opt -segmented_partition=true -rebalance=true -lb=${lb} ${SER} --serialization_prefix=./serial/${GRAPH}_wcc_opt_tt
+    RunApp ${np} wcc_opt -rebalance=true -lb=${lb} ${SER} --serialization_prefix=./serial/${GRAPH}_wcc_opt_tt
     WCCVerify ${GRAPE_HOME}/dataset/${GRAPH}-WCC
 
-    RunApp ${np} wcc_opt -segmented_partition=true -rebalance=false -lb=${lb} ${SER} --serialization_prefix=./serial/${GRAPH}_wcc_opt_tf
+    RunApp ${np} wcc_opt  -rebalance=false -lb=${lb} ${SER} --serialization_prefix=./serial/${GRAPH}_wcc_opt_tf
     WCCVerify ${GRAPE_HOME}/dataset/${GRAPH}-WCC
 
-    RunApp ${np} wcc_opt -segmented_partition=false -rebalance=true -lb=${lb} ${SER} --serialization_prefix=./serial/${GRAPH}_wcc_opt_ft
+    RunApp ${np} wcc_opt --partitioner_type=hash -rebalance=true -lb=${lb} ${SER} --serialization_prefix=./serial/${GRAPH}_wcc_opt_ft
     WCCVerify ${GRAPE_HOME}/dataset/${GRAPH}-WCC
 
-    RunApp ${np} wcc_opt -segmented_partition=false -rebalance=false -lb=${lb} ${SER} --serialization_prefix=./serial/${GRAPH}_wcc_opt_ff
+    RunApp ${np} wcc_opt --partitioner_type=hash -rebalance=false -lb=${lb} ${SER} --serialization_prefix=./serial/${GRAPH}_wcc_opt_ff
     WCCVerify ${GRAPE_HOME}/dataset/${GRAPH}-WCC
   done
 done
diff --git a/misc/load_tests.cc b/misc/load_tests.cc
new file mode 100644
index 00000000..f69dc068
--- /dev/null
+++ b/misc/load_tests.cc
@@ -0,0 +1,178 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <thread>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <grape/fragment/immutable_edgecut_fragment.h>
+#include <grape/fragment/loader.h>
+#include <grape/fragment/mutable_edgecut_fragment.h>
+#include <grape/grape.h>
+#include <grape/util.h>
+
+#include "sssp/sssp.h"
+
+#ifndef __AFFINITY__
+#define __AFFINITY__ false
+#endif
+
+DEFINE_string(efile, "", "edge file");
+DEFINE_string(vfile, "", "vertex file");
+DEFINE_string(out_prefix, "", "output directory of results");
+DEFINE_int64(sssp_source, 0, "source vertex of sssp.");
+DEFINE_string(loader_type, "basic", "loader type: basic, rb, efile or local");
+DEFINE_bool(string_id, false, "whether to use string as origin id");
+
+void Init() {
+  if (FLAGS_out_prefix.empty()) {
+    LOG(FATAL) << "Please assign an output prefix.";
+  }
+  if (FLAGS_efile.empty()) {
+    LOG(FATAL) << "Please assign input edge files.";
+  }
+  if (access(FLAGS_out_prefix.c_str(), 0) != 0) {
+    mkdir(FLAGS_out_prefix.c_str(), 0777);
+  }
+
+  grape::InitMPIComm();
+  grape::CommSpec comm_spec;
+  comm_spec.Init(MPI_COMM_WORLD);
+  if (comm_spec.worker_id() == grape::kCoordinatorRank) {
+    VLOG(1) << "Workers of libgrape-lite initialized.";
+  }
+}
+
+void Finalize() {
+  grape::FinalizeMPIComm();
+  VLOG(1) << "Workers finalized.";
+}
+
+template <typename FRAG_T, typename APP_T, typename... Args>
+void DoQuery(std::shared_ptr<FRAG_T> fragment, std::shared_ptr<APP_T> app,
+             const grape::CommSpec& comm_spec,
+             const grape::ParallelEngineSpec& spec,
+             const std::string& out_prefix, Args... args) {
+  auto worker = APP_T::CreateWorker(app, fragment);
+  worker->Init(comm_spec, spec);
+  worker->Query(std::forward<Args>(args)...);
+
+  std::ofstream ostream;
+  std::string output_path =
+      grape::GetResultFilename(out_prefix, fragment->fid());
+  ostream.open(output_path);
+  worker->Output(ostream);
+  ostream.close();
+  worker->Finalize();
+}
+
+template <typename T>
+struct ParamConverter {};
+
+template <>
+struct ParamConverter<int64_t> {
+  static int64_t FromInt64(int64_t val) { return val; }
+};
+
+template <>
+struct ParamConverter<std::string> {
+  static std::string FromInt64(int64_t val) { return std::to_string(val); }
+};
+
+template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T,
+          grape::LoadStrategy load_strategy, template <class> class APP_T,
+          typename... Args>
+void CreateAndQuery(const grape::CommSpec& comm_spec,
+                    const grape::LoadGraphSpec& graph_spec,
+                    const std::string& out_prefix,
+                    const grape::ParallelEngineSpec& spec, Args... args) {
+  using FRAG_T = grape::ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
+                                                 load_strategy>;
+  std::shared_ptr<FRAG_T> fragment =
+      grape::LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
+  using AppType = APP_T<FRAG_T>;
+  auto app = std::make_shared<AppType>();
+  DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec, out_prefix,
+                                    args...);
+}
+
+int main(int argc, char* argv[]) {
+  FLAGS_stderrthreshold = 0;
+
+  grape::gflags::SetUsageMessage(
+      "Usage: mpiexec [mpi_opts] ./run_app [grape_opts]");
+  if (argc == 1) {
+    grape::gflags::ShowUsageWithFlagsRestrict(argv[0], "analytical_apps");
+    exit(1);
+  }
+  grape::gflags::ParseCommandLineFlags(&argc, &argv, true);
+  grape::gflags::ShutDownCommandLineFlags();
+
+  google::InitGoogleLogging("analytical_apps");
+  google::InstallFailureSignalHandler();
+
+  Init();
+
+  {
+    grape::CommSpec comm_spec;
+    comm_spec.Init(MPI_COMM_WORLD);
+    grape::LoadGraphSpec graph_spec = grape::DefaultLoadGraphSpec();
+    graph_spec.set_directed(false);
+    if (FLAGS_loader_type == "rb") {
+      graph_spec.set_rebalance(true, 0);
+      graph_spec.partitioner_type = grape::PartitionerType::kMapPartitioner;
+      // idxer_type = kMapIdxer;
+    } else if (FLAGS_loader_type == "efile") {
+      FLAGS_vfile = "";
+      graph_spec.set_rebalance(false, 0);
+      graph_spec.partitioner_type = grape::PartitionerType::kHashPartitioner;
+      // idxer_type = kMapIdxer;
+    } else if (FLAGS_loader_type == "local") {
+      graph_spec.set_rebalance(false, 0);
+      graph_spec.partitioner_type = grape::PartitionerType::kHashPartitioner;
+      graph_spec.idxer_type = grape::IdxerType::kLocalIdxer;
+    } else {
+      CHECK_EQ(FLAGS_loader_type, "basic");
+      graph_spec.set_rebalance(false, 0);
+
+      // partitioner_type = kMapPartitioner;
+      // idxer_type = kMapIdxer;
+    }
+    if (FLAGS_string_id) {
+      CreateAndQuery<std::string, uint32_t, grape::EmptyType, double,
+                     grape::LoadStrategy::kOnlyOut, grape::SSSP, std::string>(
+          comm_spec, graph_spec, FLAGS_out_prefix,
+          grape::DefaultParallelEngineSpec(),
+          ParamConverter<std::string>::FromInt64(FLAGS_sssp_source));
+    } else {
+      CreateAndQuery<int64_t, uint32_t, grape::EmptyType, double,
+                     grape::LoadStrategy::kOnlyOut, grape::SSSP, int64_t>(
+          comm_spec, graph_spec, FLAGS_out_prefix,
+          grape::DefaultParallelEngineSpec(),
+          ParamConverter<int64_t>::FromInt64(FLAGS_sssp_source));
+    }
+  }
+  Finalize();
+
+  google::ShutdownGoogleLogging();
+}
diff --git a/misc/mutable_fragment_tests.cc b/misc/mutable_fragment_tests.cc
new file mode 100644
index 00000000..a018845f
--- /dev/null
+++ b/misc/mutable_fragment_tests.cc
@@ -0,0 +1,271 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+
+#include <grape/fragment/loader.h>
+#include <grape/fragment/mutable_edgecut_fragment.h>
+#include <grape/grape.h>
+#include <grape/util.h>
+
+#include "bfs/bfs.h"
+#include "bfs/bfs_auto.h"
+#include "cdlp/cdlp.h"
+#include "cdlp/cdlp_auto.h"
+#include "flags.h"
+#include "lcc/lcc.h"
+#include "lcc/lcc_auto.h"
+#include "pagerank/pagerank.h"
+#include "pagerank/pagerank_auto.h"
+#include "pagerank/pagerank_local.h"
+#include "pagerank/pagerank_local_parallel.h"
+#include "pagerank/pagerank_parallel.h"
+#include "sssp/sssp.h"
+#include "sssp/sssp_auto.h"
+#include "timer.h"
+#include "wcc/wcc.h"
+#include "wcc/wcc_auto.h"
+
+#ifndef __AFFINITY__
+#define __AFFINITY__ false
+#endif
+
+DEFINE_string(efile, "", "edge file");
+DEFINE_string(vfile, "", "vertex file");
+DEFINE_string(delta_efile, "", "delta edge file");
+DEFINE_string(delta_vfile, "", "delta vertex file");
+DEFINE_string(out_prefix, "", "output directory of results");
+DEFINE_int64(bfs_source, 0, "source vertex of bfs.");
+DEFINE_int32(cdlp_mr, 10, "max rounds of cdlp.");
+DEFINE_int64(sssp_source, 0, "source vertex of sssp.");
+DEFINE_double(pr_d, 0.85, "damping_factor of pagerank");
+DEFINE_int32(pr_mr, 10, "max rounds of pagerank");
+DEFINE_bool(directed, false, "input graph is directed or not.");
+DEFINE_string(application, "", "application name");
+
+void Init() {
+  if (FLAGS_out_prefix.empty()) {
+    LOG(FATAL) << "Please assign an output prefix.";
+  }
+  if (FLAGS_efile.empty()) {
+    LOG(FATAL) << "Please assign input edge files.";
+  }
+  if (access(FLAGS_out_prefix.c_str(), 0) != 0) {
+    mkdir(FLAGS_out_prefix.c_str(), 0777);
+  }
+
+  grape::InitMPIComm();
+  grape::CommSpec comm_spec;
+  comm_spec.Init(MPI_COMM_WORLD);
+  if (comm_spec.worker_id() == grape::kCoordinatorRank) {
+    VLOG(1) << "Workers of libgrape-lite initialized.";
+  }
+}
+
+void Finalize() {
+  grape::FinalizeMPIComm();
+  VLOG(1) << "Workers finalized.";
+}
+
+template <typename FRAG_T, typename APP_T, typename... Args>
+void DoQuery(std::shared_ptr<FRAG_T> fragment, std::shared_ptr<APP_T> app,
+             const grape::CommSpec& comm_spec,
+             const grape::ParallelEngineSpec& spec,
+             const std::string& out_prefix, Args... args) {
+  timer_next("load application");
+  auto worker = APP_T::CreateWorker(app, fragment);
+  worker->Init(comm_spec, spec);
+  timer_next("run algorithm");
+  worker->Query(std::forward<Args>(args)...);
+  timer_next("print output");
+
+  std::ofstream ostream;
+  std::string output_path =
+      grape::GetResultFilename(out_prefix, fragment->fid());
+  ostream.open(output_path);
+  worker->Output(ostream);
+  ostream.close();
+  worker->Finalize();
+  timer_end();
+  VLOG(1) << "Worker-" << comm_spec.worker_id() << " finished: " << output_path;
+}
+
+template <typename T>
+struct ParamConverter {};
+
+template <>
+struct ParamConverter<int64_t> {
+  static int64_t FromInt64(int64_t val) { return val; }
+};
+
+template <>
+struct ParamConverter<std::string> {
+  static std::string FromInt64(int64_t val) { return std::to_string(val); }
+};
+
+template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T,
+          grape::LoadStrategy load_strategy, template <class> class APP_T,
+          typename... Args>
+void CreateAndQuery(const grape::CommSpec& comm_spec,
+                    const std::string& out_prefix, int fnum,
+                    const grape::ParallelEngineSpec& spec, Args... args) {
+  timer_next("load graph");
+  grape::LoadGraphSpec graph_spec = grape::DefaultLoadGraphSpec();
+  graph_spec.set_directed(FLAGS_directed);
+  graph_spec.set_rebalance(false, 0);
+  graph_spec.idxer_type = grape::IdxerType::kHashMapIdxer;
+  using FRAG_T = grape::MutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
+                                               load_strategy>;
+  std::shared_ptr<FRAG_T> fragment = grape::LoadGraphAndMutate<FRAG_T>(
+      FLAGS_efile, FLAGS_vfile, FLAGS_delta_efile, FLAGS_delta_vfile, comm_spec,
+      graph_spec);
+  using AppType = APP_T<FRAG_T>;
+  auto app = std::make_shared<AppType>();
+  DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec, out_prefix,
+                                    args...);
+}
+
+template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T>
+void Run() {
+  grape::CommSpec comm_spec;
+  comm_spec.Init(MPI_COMM_WORLD);
+
+  bool is_coordinator = comm_spec.worker_id() == grape::kCoordinatorRank;
+  timer_start(is_coordinator);
+
+  // FIXME: no barrier apps. more manager? or use a dynamic-cast.
+  std::string efile = FLAGS_efile;
+  std::string vfile = FLAGS_vfile;
+  std::string delta_efile = FLAGS_delta_efile;
+  std::string delta_vfile = FLAGS_delta_vfile;
+  std::string out_prefix = FLAGS_out_prefix;
+  auto spec = grape::MultiProcessSpec(comm_spec, __AFFINITY__);
+  int fnum = comm_spec.fnum();
+  std::string name = FLAGS_application;
+  if (name.find("sssp") != std::string::npos) {
+    if (name == "sssp") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, double,
+                     grape::LoadStrategy::kOnlyOut, grape::SSSP, OID_T>(
+          comm_spec, out_prefix, fnum, spec,
+          ParamConverter<OID_T>::FromInt64(FLAGS_sssp_source));
+    } else if (name == "sssp_auto") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, double,
+                     grape::LoadStrategy::kOnlyOut, grape::SSSPAuto, OID_T>(
+          comm_spec, out_prefix, fnum, spec,
+          ParamConverter<OID_T>::FromInt64(FLAGS_sssp_source));
+    } else {
+      LOG(FATAL) << "No avaiable application named [" << name << "].";
+    }
+  } else {
+    if (name == "bfs") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, grape::EmptyType,
+                     grape::LoadStrategy::kOnlyOut, grape::BFS, OID_T>(
+          comm_spec, out_prefix, fnum, spec,
+          ParamConverter<OID_T>::FromInt64(FLAGS_bfs_source));
+    } else if (name == "bfs_auto") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, grape::EmptyType,
+                     grape::LoadStrategy::kOnlyOut, grape::BFSAuto, OID_T>(
+          comm_spec, out_prefix, fnum, spec,
+          ParamConverter<OID_T>::FromInt64(FLAGS_bfs_source));
+    } else if (name == "pagerank_local") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, grape::EmptyType,
+                     grape::LoadStrategy::kOnlyOut, grape::PageRankLocal,
+                     double, int>(comm_spec, out_prefix, fnum, spec, FLAGS_pr_d,
+                                  FLAGS_pr_mr);
+    } else if (name == "pagerank_local_parallel") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, grape::EmptyType,
+                     grape::LoadStrategy::kBothOutIn,
+                     grape::PageRankLocalParallel, double, int>(
+          comm_spec, out_prefix, fnum, spec, FLAGS_pr_d, FLAGS_pr_mr);
+    } else if (name == "pagerank") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, grape::EmptyType,
+                     grape::LoadStrategy::kOnlyOut, grape::PageRank, double,
+                     int>(comm_spec, out_prefix, fnum, spec, FLAGS_pr_d,
+                          FLAGS_pr_mr);
+    } else if (name == "pagerank_auto") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, grape::EmptyType,
+                     grape::LoadStrategy::kBothOutIn, grape::PageRankAuto,
+                     double, int>(comm_spec, out_prefix, fnum, spec, FLAGS_pr_d,
+                                  FLAGS_pr_mr);
+    } else if (name == "pagerank_parallel") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, grape::EmptyType,
+                     grape::LoadStrategy::kBothOutIn, grape::PageRankParallel,
+                     double, int>(comm_spec, out_prefix, fnum, spec, FLAGS_pr_d,
+                                  FLAGS_pr_mr);
+    } else if (name == "cdlp") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, grape::EmptyType,
+                     grape::LoadStrategy::kOnlyOut, grape::CDLP, int>(
+          comm_spec, out_prefix, fnum, spec, FLAGS_cdlp_mr);
+    } else if (name == "cdlp_auto") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, grape::EmptyType,
+                     grape::LoadStrategy::kBothOutIn, grape::CDLPAuto, int>(
+          comm_spec, out_prefix, fnum, spec, FLAGS_cdlp_mr);
+    } else if (name == "wcc") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, grape::EmptyType,
+                     grape::LoadStrategy::kOnlyOut, grape::WCC>(
+          comm_spec, out_prefix, fnum, spec);
+    } else if (name == "wcc_auto") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, grape::EmptyType,
+                     grape::LoadStrategy::kOnlyOut, grape::WCCAuto>(
+          comm_spec, out_prefix, fnum, spec);
+    } else if (name == "lcc") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, grape::EmptyType,
+                     grape::LoadStrategy::kOnlyOut, grape::LCC>(
+          comm_spec, out_prefix, fnum, spec);
+    } else if (name == "lcc_auto") {
+      CreateAndQuery<OID_T, VID_T, VDATA_T, grape::EmptyType,
+                     grape::LoadStrategy::kOnlyOut, grape::LCCAuto>(
+          comm_spec, out_prefix, fnum, spec);
+    } else {
+      LOG(FATAL) << "No avaiable application named [" << name << "].";
+    }
+  }
+}
+
+int main(int argc, char* argv[]) {
+  FLAGS_stderrthreshold = 0;
+
+  grape::gflags::SetUsageMessage(
+      "Usage: mpiexec [mpi_opts] ./run_app [grape_opts]");
+  if (argc == 1) {
+    grape::gflags::ShowUsageWithFlagsRestrict(argv[0], "analytical_apps");
+    exit(1);
+  }
+  grape::gflags::ParseCommandLineFlags(&argc, &argv, true);
+  grape::gflags::ShutDownCommandLineFlags();
+
+  google::InitGoogleLogging("analytical_apps");
+  google::InstallFailureSignalHandler();
+
+  Init();
+
+  std::string name = FLAGS_application;
+  if (name.find("sssp") != std::string::npos) {
+    Run<int64_t, uint32_t, grape::EmptyType, double>();
+  } else {
+    Run<int64_t, uint32_t, grape::EmptyType, grape::EmptyType>();
+  }
+
+  Finalize();
+
+  google::ShutdownGoogleLogging();
+}
diff --git a/misc/vertex_map_tests.cc b/misc/vertex_map_tests.cc
new file mode 100644
index 00000000..2977c442
--- /dev/null
+++ b/misc/vertex_map_tests.cc
@@ -0,0 +1,330 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <thread>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <grape/fragment/immutable_edgecut_fragment.h>
+#include <grape/fragment/loader.h>
+#include <grape/fragment/mutable_edgecut_fragment.h>
+#include <grape/grape.h>
+#include <grape/util.h>
+
+#ifndef __AFFINITY__
+#define __AFFINITY__ false
+#endif
+
+DEFINE_string(efile, "", "edge file");
+DEFINE_string(vfile, "", "vertex file");
+DEFINE_string(mutable_efile_base, "", "base of mutable edge file");
+DEFINE_string(mutable_efile_delta, "", "delta of mutable edge file");
+DEFINE_string(serialization_prefix, "",
+              "directory to place serialization files");
+
+void Init() {
+  if (FLAGS_efile.empty()) {
+    LOG(FATAL) << "Please assign input edge files.";
+  }
+  if (access(FLAGS_serialization_prefix.c_str(), 0) != 0) {
+    mkdir(FLAGS_serialization_prefix.c_str(), 0777);
+  }
+
+  grape::InitMPIComm();
+  grape::CommSpec comm_spec;
+  comm_spec.Init(MPI_COMM_WORLD);
+  if (comm_spec.worker_id() == grape::kCoordinatorRank) {
+    VLOG(1) << "Workers of libgrape-lite initialized.";
+  }
+}
+
+void Finalize() {
+  grape::FinalizeMPIComm();
+  VLOG(1) << "Workers finalized.";
+}
+
+template <typename OID_T, typename VID_T>
+bool verify_vertex_map(const grape::CommSpec& comm_spec,
+                       const grape::VertexMap<OID_T, VID_T>& vertex_map) {
+  grape::fid_t fnum = comm_spec.fnum();
+  std::vector<std::vector<std::pair<VID_T, OID_T>>> all_maps_g2o(fnum);
+  std::vector<std::vector<std::pair<OID_T, VID_T>>> all_maps_o2g(fnum);
+  bool ret = true;
+  for (grape::fid_t fid = 0; fid != fnum; ++fid) {
+    VID_T frag_vnum = vertex_map.GetInnerVertexSize(fid);
+    for (VID_T lid = 0; lid < frag_vnum; ++lid) {
+      OID_T oid_a, oid_b;
+      if (vertex_map.GetOid(fid, lid, oid_a)) {
+        VID_T gid_a{}, gid_b{};
+        if (!vertex_map.GetGid(fid, oid_a, gid_a)) {
+          LOG(ERROR) << "Vertex " << oid_a << " not found by fid+oid in vertex "
+                     << "map.";
+          ret = false;
+          continue;
+        }
+        if (!vertex_map.GetGid(oid_a, gid_b)) {
+          LOG(ERROR) << "Vertex " << oid_a << " not found by oid in vertex "
+                     << "map.";
+          ret = false;
+          continue;
+        }
+        if (gid_a != gid_b) {
+          LOG(ERROR) << "Vertex " << oid_a << " gid not consistent.";
+          ret = false;
+          continue;
+        }
+        if (!vertex_map.GetOid(gid_a, oid_b)) {
+          LOG(ERROR) << "Vertex " << gid_a << " not found by gid in vertex "
+                     << "map.";
+          ret = false;
+          continue;
+        }
+        if (oid_a != oid_b) {
+          LOG(ERROR) << "Vertex " << gid_a << " oid not consistent.";
+          ret = false;
+          continue;
+        }
+        all_maps_g2o[gid_a % fnum].emplace_back(gid_a, oid_a);
+        all_maps_o2g[std::hash<OID_T>{}(oid_a) % fnum].emplace_back(oid_a,
+                                                                    gid_a);
+      }
+    }
+  }
+
+  {
+    std::vector<std::vector<std::pair<VID_T, OID_T>>> all_maps_g2o_in(fnum);
+    grape::sync_comm::AllToAll(all_maps_g2o, all_maps_g2o_in, comm_spec.comm());
+
+    std::vector<std::pair<VID_T, OID_T>> all_maps_merged;
+    for (auto& maps : all_maps_g2o_in) {
+      all_maps_merged.insert(all_maps_merged.end(), maps.begin(), maps.end());
+    }
+
+    std::sort(all_maps_merged.begin(), all_maps_merged.end());
+    for (size_t i = 1; i < all_maps_merged.size(); ++i) {
+      if (all_maps_merged[i].first == all_maps_merged[i - 1].first) {
+        if (all_maps_merged[i].second != all_maps_merged[i - 1].second) {
+          LOG(ERROR) << "Vertex " << all_maps_merged[i].first
+                     << " has different oid in different fragments.";
+          ret = false;
+        }
+      }
+    }
+  }
+
+  {
+    std::vector<std::vector<std::pair<OID_T, VID_T>>> all_maps_o2g_in(fnum);
+    grape::sync_comm::AllToAll(all_maps_o2g, all_maps_o2g_in, comm_spec.comm());
+
+    std::vector<std::pair<OID_T, VID_T>> all_maps_merged;
+    for (auto& maps : all_maps_o2g_in) {
+      all_maps_merged.insert(all_maps_merged.end(), maps.begin(), maps.end());
+    }
+
+    std::sort(all_maps_merged.begin(), all_maps_merged.end());
+    for (size_t i = 1; i < all_maps_merged.size(); ++i) {
+      if (all_maps_merged[i].first == all_maps_merged[i - 1].first) {
+        if (all_maps_merged[i].second != all_maps_merged[i - 1].second) {
+          LOG(ERROR) << "Vertex " << all_maps_merged[i].first
+                     << " has different gid in different fragments.";
+          ret = false;
+        }
+      }
+    }
+  }
+
+  return ret;
+}
+
+template <typename FRAG_T, typename VERTEX_MAP_T>
+bool verify_fragment_vertex_map(const FRAG_T& frag,
+                                const VERTEX_MAP_T& vertex_map) {
+  auto inner_vertices = frag.InnerVertices();
+  auto outer_vertices = frag.OuterVertices();
+  using vid_t = typename FRAG_T::vid_t;
+  using oid_t = typename FRAG_T::oid_t;
+  for (auto v : inner_vertices) {
+    vid_t gid = frag.GetInnerVertexGid(v);
+    oid_t oid;
+    if (!vertex_map.GetOid(gid, oid)) {
+      LOG(ERROR) << "Vertex " << gid << " not found in vertex map.";
+      return false;
+    }
+  }
+  for (auto v : outer_vertices) {
+    vid_t gid = frag.GetOuterVertexGid(v);
+    oid_t oid;
+    if (!vertex_map.GetOid(gid, oid)) {
+      LOG(ERROR) << "Vertex " << gid << " not found in vertex map.";
+      return false;
+    }
+  }
+  return true;
+}
+
+template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T>
+void test_build_vertex_map(const std::string& efile, const std::string& vfile,
+                           const grape::LoadGraphSpec& graph_spec,
+                           const grape::CommSpec& comm_spec) {
+  using FRAG_T =
+      grape::ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T>;
+  std::shared_ptr<FRAG_T> fragment =
+      grape::LoadGraph<FRAG_T>(efile, vfile, comm_spec, graph_spec);
+
+  verify_fragment_vertex_map(*fragment, fragment->GetVertexMap());
+  verify_vertex_map(comm_spec, fragment->GetVertexMap());
+}
+
+template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T>
+void test_mutate_vertex_map(const std::string& efile_base,
+                            const std::string& vfile,
+                            const std::string& efile_delta,
+                            const grape::LoadGraphSpec& graph_spec,
+                            const grape::CommSpec& comm_spec) {
+  using FRAG_T = grape::MutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T>;
+  std::shared_ptr<FRAG_T> fragment = grape::LoadGraphAndMutate<FRAG_T>(
+      efile_base, vfile, efile_delta, "", comm_spec, graph_spec);
+
+  verify_fragment_vertex_map(*fragment, fragment->GetVertexMap());
+  verify_vertex_map(comm_spec, fragment->GetVertexMap());
+}
+
+int main(int argc, char* argv[]) {
+  FLAGS_stderrthreshold = 0;
+  grape::gflags::SetUsageMessage(
+      "Usage: mpiexec [mpi_opts] ./run_app [grape_opts]");
+  if (argc == 1) {
+    grape::gflags::ShowUsageWithFlagsRestrict(argv[0], "vertex_map_tests");
+    exit(1);
+  }
+  grape::gflags::ParseCommandLineFlags(&argc, &argv, true);
+  grape::gflags::ShutDownCommandLineFlags();
+
+  google::InitGoogleLogging("vertex_map_tests");
+  google::InstallFailureSignalHandler();
+
+  Init();
+
+  std::vector<bool> string_id_options({false, true});
+  std::vector<bool> rebalance_options({false, true});
+  std::vector<grape::PartitionerType> partitioner_options(
+      {grape::PartitionerType::kHashPartitioner,
+       grape::PartitionerType::kMapPartitioner,
+       grape::PartitionerType::kSegmentedPartitioner});
+  std::vector<grape::IdxerType> idxer_options(
+      {grape::IdxerType::kHashMapIdxer, grape::IdxerType::kHashMapIdxerView,
+       grape::IdxerType::kPTHashIdxer, grape::IdxerType::kSortedArrayIdxer,
+       grape::IdxerType::kLocalIdxer});
+
+  {
+    grape::CommSpec comm_spec;
+    comm_spec.Init(MPI_COMM_WORLD);
+    int idx = 0;
+    for (auto string_id : string_id_options) {
+      for (auto rebalance : rebalance_options) {
+        for (auto partitioner_type : partitioner_options) {
+          for (auto idxer_type : idxer_options) {
+            if (rebalance) {
+              if (partitioner_type ==
+                  grape::PartitionerType::kHashPartitioner) {
+                continue;
+              }
+            }
+            if (idxer_type == grape::IdxerType::kLocalIdxer) {
+              if (partitioner_type !=
+                  grape::PartitionerType::kHashPartitioner) {
+                continue;
+              }
+            }
+            bool vm_extendable =
+                (idxer_type == grape::IdxerType::kHashMapIdxer ||
+                 idxer_type == grape::IdxerType::kLocalIdxer);
+            VLOG(2) << "Test " << idx++ << ": string_id=" << string_id
+                    << ", rebalance=" << rebalance
+                    << ", partitioner_type=" << partitioner_type
+                    << ", idxer_type=" << idxer_type;
+            grape::LoadGraphSpec graph_spec = grape::DefaultLoadGraphSpec();
+            graph_spec.set_directed(false);
+            if (rebalance) {
+              graph_spec.set_rebalance(true, 0);
+            } else {
+              graph_spec.set_rebalance(false, 0);
+            }
+            graph_spec.partitioner_type = partitioner_type;
+            graph_spec.idxer_type = idxer_type;
+
+            graph_spec.set_serialize(true, FLAGS_serialization_prefix);
+            if (string_id) {
+              test_build_vertex_map<std::string, uint32_t, grape::EmptyType,
+                                    double>(FLAGS_efile, FLAGS_vfile,
+                                            graph_spec, comm_spec);
+              if (vm_extendable) {
+                test_mutate_vertex_map<std::string, uint32_t, grape::EmptyType,
+                                       double>(
+                    FLAGS_mutable_efile_base, FLAGS_vfile,
+                    FLAGS_mutable_efile_delta, graph_spec, comm_spec);
+              }
+            } else {
+              test_build_vertex_map<int64_t, uint32_t, grape::EmptyType,
+                                    double>(FLAGS_efile, FLAGS_vfile,
+                                            graph_spec, comm_spec);
+              if (vm_extendable) {
+                test_mutate_vertex_map<int64_t, uint32_t, grape::EmptyType,
+                                       double>(
+                    FLAGS_mutable_efile_base, FLAGS_vfile,
+                    FLAGS_mutable_efile_delta, graph_spec, comm_spec);
+              }
+            }
+
+            graph_spec.set_deserialize(true, FLAGS_serialization_prefix);
+            if (string_id) {
+              test_build_vertex_map<std::string, uint32_t, grape::EmptyType,
+                                    double>(FLAGS_efile, FLAGS_vfile,
+                                            graph_spec, comm_spec);
+              if (vm_extendable) {
+                test_mutate_vertex_map<std::string, uint32_t, grape::EmptyType,
+                                       double>(
+                    FLAGS_mutable_efile_base, FLAGS_vfile,
+                    FLAGS_mutable_efile_delta, graph_spec, comm_spec);
+              }
+            } else {
+              test_build_vertex_map<int64_t, uint32_t, grape::EmptyType,
+                                    double>(FLAGS_efile, FLAGS_vfile,
+                                            graph_spec, comm_spec);
+              if (vm_extendable) {
+                test_mutate_vertex_map<int64_t, uint32_t, grape::EmptyType,
+                                       double>(
+                    FLAGS_mutable_efile_base, FLAGS_vfile,
+                    FLAGS_mutable_efile_delta, graph_spec, comm_spec);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  Finalize();
+
+  google::ShutdownGoogleLogging();
+}
diff --git a/tests/load_tests.cc b/tests/load_tests.cc
new file mode 100644
index 00000000..f69dc068
--- /dev/null
+++ b/tests/load_tests.cc
@@ -0,0 +1,178 @@
+/** Copyright 2020 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <thread>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <grape/fragment/immutable_edgecut_fragment.h>
+#include <grape/fragment/loader.h>
+#include <grape/fragment/mutable_edgecut_fragment.h>
+#include <grape/grape.h>
+#include <grape/util.h>
+
+#include "sssp/sssp.h"
+
+#ifndef __AFFINITY__
+#define __AFFINITY__ false
+#endif
+
+DEFINE_string(efile, "", "edge file");
+DEFINE_string(vfile, "", "vertex file");
+DEFINE_string(out_prefix, "", "output directory of results");
+DEFINE_int64(sssp_source, 0, "source vertex of sssp.");
+DEFINE_string(loader_type, "basic", "loader type: basic, rb, efile or local");
+DEFINE_bool(string_id, false, "whether to use string as origin id");
+
+void Init() {
+  if (FLAGS_out_prefix.empty()) {
+    LOG(FATAL) << "Please assign an output prefix.";
+  }
+  if (FLAGS_efile.empty()) {
+    LOG(FATAL) << "Please assign input edge files.";
+  }
+  if (access(FLAGS_out_prefix.c_str(), 0) != 0) {
+    mkdir(FLAGS_out_prefix.c_str(), 0777);
+  }
+
+  grape::InitMPIComm();
+  grape::CommSpec comm_spec;
+  comm_spec.Init(MPI_COMM_WORLD);
+  if (comm_spec.worker_id() == grape::kCoordinatorRank) {
+    VLOG(1) << "Workers of libgrape-lite initialized.";
+  }
+}
+
+void Finalize() {
+  grape::FinalizeMPIComm();
+  VLOG(1) << "Workers finalized.";
+}
+
+template <typename FRAG_T, typename APP_T, typename... Args>
+void DoQuery(std::shared_ptr<FRAG_T> fragment, std::shared_ptr<APP_T> app,
+             const grape::CommSpec& comm_spec,
+             const grape::ParallelEngineSpec& spec,
+             const std::string& out_prefix, Args... args) {
+  auto worker = APP_T::CreateWorker(app, fragment);
+  worker->Init(comm_spec, spec);
+  worker->Query(std::forward<Args>(args)...);
+
+  std::ofstream ostream;
+  std::string output_path =
+      grape::GetResultFilename(out_prefix, fragment->fid());
+  ostream.open(output_path);
+  worker->Output(ostream);
+  ostream.close();
+  worker->Finalize();
+}
+
+template <typename T>
+struct ParamConverter {};
+
+template <>
+struct ParamConverter<int64_t> {
+  static int64_t FromInt64(int64_t val) { return val; }
+};
+
+template <>
+struct ParamConverter<std::string> {
+  static std::string FromInt64(int64_t val) { return std::to_string(val); }
+};
+
+template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T,
+          grape::LoadStrategy load_strategy, template <class> class APP_T,
+          typename... Args>
+void CreateAndQuery(const grape::CommSpec& comm_spec,
+                    const grape::LoadGraphSpec& graph_spec,
+                    const std::string& out_prefix,
+                    const grape::ParallelEngineSpec& spec, Args... args) {
+  using FRAG_T = grape::ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
+                                                 load_strategy>;
+  std::shared_ptr<FRAG_T> fragment =
+      grape::LoadGraph<FRAG_T>(FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
+  using AppType = APP_T<FRAG_T>;
+  auto app = std::make_shared<AppType>();
+  DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec, out_prefix,
+                                    args...);
+}
+
+int main(int argc, char* argv[]) {
+  FLAGS_stderrthreshold = 0;
+
+  grape::gflags::SetUsageMessage(
+      "Usage: mpiexec [mpi_opts] ./run_app [grape_opts]");
+  if (argc == 1) {
+    grape::gflags::ShowUsageWithFlagsRestrict(argv[0], "analytical_apps");
+    exit(1);
+  }
+  grape::gflags::ParseCommandLineFlags(&argc, &argv, true);
+  grape::gflags::ShutDownCommandLineFlags();
+
+  google::InitGoogleLogging("analytical_apps");
+  google::InstallFailureSignalHandler();
+
+  Init();
+
+  {
+    grape::CommSpec comm_spec;
+    comm_spec.Init(MPI_COMM_WORLD);
+    grape::LoadGraphSpec graph_spec = grape::DefaultLoadGraphSpec();
+    graph_spec.set_directed(false);
+    if (FLAGS_loader_type == "rb") {
+      graph_spec.set_rebalance(true, 0);
+      graph_spec.partitioner_type = grape::PartitionerType::kMapPartitioner;
+      // idxer_type = kMapIdxer;
+    } else if (FLAGS_loader_type == "efile") {
+      FLAGS_vfile = "";
+      graph_spec.set_rebalance(false, 0);
+      graph_spec.partitioner_type = grape::PartitionerType::kHashPartitioner;
+      // idxer_type = kMapIdxer;
+    } else if (FLAGS_loader_type == "local") {
+      graph_spec.set_rebalance(false, 0);
+      graph_spec.partitioner_type = grape::PartitionerType::kHashPartitioner;
+      graph_spec.idxer_type = grape::IdxerType::kLocalIdxer;
+    } else {
+      CHECK_EQ(FLAGS_loader_type, "basic");
+      graph_spec.set_rebalance(false, 0);
+
+      // partitioner_type = kMapPartitioner;
+      // idxer_type = kMapIdxer;
+    }
+    if (FLAGS_string_id) {
+      CreateAndQuery<std::string, uint32_t, grape::EmptyType, double,
+                     grape::LoadStrategy::kOnlyOut, grape::SSSP, std::string>(
+          comm_spec, graph_spec, FLAGS_out_prefix,
+          grape::DefaultParallelEngineSpec(),
+          ParamConverter<std::string>::FromInt64(FLAGS_sssp_source));
+    } else {
+      CreateAndQuery<int64_t, uint32_t, grape::EmptyType, double,
+                     grape::LoadStrategy::kOnlyOut, grape::SSSP, int64_t>(
+          comm_spec, graph_spec, FLAGS_out_prefix,
+          grape::DefaultParallelEngineSpec(),
+          ParamConverter<int64_t>::FromInt64(FLAGS_sssp_source));
+    }
+  }
+  Finalize();
+
+  google::ShutdownGoogleLogging();
+}
diff --git a/tests/mutable_fragment_tests.cc b/tests/mutable_fragment_tests.cc
index 7e619e39..a018845f 100644
--- a/tests/mutable_fragment_tests.cc
+++ b/tests/mutable_fragment_tests.cc
@@ -26,7 +26,6 @@ limitations under the License.
 #include <grape/fragment/mutable_edgecut_fragment.h>
 #include <grape/grape.h>
 #include <grape/util.h>
-#include <grape/vertex_map/global_vertex_map.h>
 
 #include "bfs/bfs.h"
 #include "bfs/bfs_auto.h"
@@ -133,6 +132,7 @@ void CreateAndQuery(const grape::CommSpec& comm_spec,
   grape::LoadGraphSpec graph_spec = grape::DefaultLoadGraphSpec();
   graph_spec.set_directed(FLAGS_directed);
   graph_spec.set_rebalance(false, 0);
+  graph_spec.idxer_type = grape::IdxerType::kHashMapIdxer;
   using FRAG_T = grape::MutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
                                                load_strategy>;
   std::shared_ptr<FRAG_T> fragment = grape::LoadGraphAndMutate<FRAG_T>(
diff --git a/tests/vertex_map_tests.cc b/tests/vertex_map_tests.cc
index 03d98ee3..2977c442 100644
--- a/tests/vertex_map_tests.cc
+++ b/tests/vertex_map_tests.cc
@@ -28,14 +28,8 @@ limitations under the License.
 #include <grape/fragment/immutable_edgecut_fragment.h>
 #include <grape/fragment/loader.h>
 #include <grape/fragment/mutable_edgecut_fragment.h>
-#include <grape/fragment/partitioner.h>
 #include <grape/grape.h>
 #include <grape/util.h>
-#include <grape/vertex_map/global_vertex_map.h>
-#include <grape/vertex_map/local_vertex_map.h>
-
-#include "sssp/sssp.h"
-#include "timer.h"
 
 #ifndef __AFFINITY__
 #define __AFFINITY__ false
@@ -43,26 +37,17 @@ limitations under the License.
 
 DEFINE_string(efile, "", "edge file");
 DEFINE_string(vfile, "", "vertex file");
-DEFINE_string(delta_efile, "", "delta edge file");
-DEFINE_string(delta_vfile, "", "delta vertex file");
-DEFINE_string(out_prefix, "", "output directory of results");
-DEFINE_int64(sssp_source, 0, "source vertex of sssp.");
-DEFINE_bool(string_id, false, "whether to use string as origin id");
-DEFINE_bool(segmented_partition, true,
-            "whether to use segmented partitioning.");
-DEFINE_bool(rebalance, false, "whether to rebalance graph after loading.");
-DEFINE_int32(rebalance_vertex_factor, 0, "vertex factor of rebalancing.");
-DEFINE_bool(global_vertex_map, true, "whether to use global vertex map.");
+DEFINE_string(mutable_efile_base, "", "base of mutable edge file");
+DEFINE_string(mutable_efile_delta, "", "delta of mutable edge file");
+DEFINE_string(serialization_prefix, "",
+              "directory to place serialization files");
 
 void Init() {
-  if (FLAGS_out_prefix.empty()) {
-    LOG(FATAL) << "Please assign an output prefix.";
-  }
   if (FLAGS_efile.empty()) {
     LOG(FATAL) << "Please assign input edge files.";
   }
-  if (access(FLAGS_out_prefix.c_str(), 0) != 0) {
-    mkdir(FLAGS_out_prefix.c_str(), 0777);
+  if (access(FLAGS_serialization_prefix.c_str(), 0) != 0) {
+    mkdir(FLAGS_serialization_prefix.c_str(), 0777);
   }
 
   grape::InitMPIComm();
@@ -78,175 +63,265 @@ void Finalize() {
   VLOG(1) << "Workers finalized.";
 }
 
-template <typename FRAG_T, typename APP_T, typename... Args>
-void DoQuery(std::shared_ptr<FRAG_T> fragment, std::shared_ptr<APP_T> app,
-             const grape::CommSpec& comm_spec,
-             const grape::ParallelEngineSpec& spec,
-             const std::string& out_prefix, Args... args) {
-  timer_next("load application");
-  auto worker = APP_T::CreateWorker(app, fragment);
-  worker->Init(comm_spec, spec);
-  timer_next("run algorithm");
-  worker->Query(std::forward<Args>(args)...);
-  timer_next("print output");
-
-  std::ofstream ostream;
-  std::string output_path =
-      grape::GetResultFilename(out_prefix, fragment->fid());
-  ostream.open(output_path);
-  worker->Output(ostream);
-  ostream.close();
-  worker->Finalize();
-  timer_end();
-  VLOG(1) << "Worker-" << comm_spec.worker_id() << " finished: " << output_path;
-}
+template <typename OID_T, typename VID_T>
+bool verify_vertex_map(const grape::CommSpec& comm_spec,
+                       const grape::VertexMap<OID_T, VID_T>& vertex_map) {
+  grape::fid_t fnum = comm_spec.fnum();
+  std::vector<std::vector<std::pair<VID_T, OID_T>>> all_maps_g2o(fnum);
+  std::vector<std::vector<std::pair<OID_T, VID_T>>> all_maps_o2g(fnum);
+  bool ret = true;
+  for (grape::fid_t fid = 0; fid != fnum; ++fid) {
+    VID_T frag_vnum = vertex_map.GetInnerVertexSize(fid);
+    for (VID_T lid = 0; lid < frag_vnum; ++lid) {
+      OID_T oid_a, oid_b;
+      if (vertex_map.GetOid(fid, lid, oid_a)) {
+        VID_T gid_a{}, gid_b{};
+        if (!vertex_map.GetGid(fid, oid_a, gid_a)) {
+          LOG(ERROR) << "Vertex " << oid_a << " not found by fid+oid in vertex "
+                     << "map.";
+          ret = false;
+          continue;
+        }
+        if (!vertex_map.GetGid(oid_a, gid_b)) {
+          LOG(ERROR) << "Vertex " << oid_a << " not found by oid in vertex "
+                     << "map.";
+          ret = false;
+          continue;
+        }
+        if (gid_a != gid_b) {
+          LOG(ERROR) << "Vertex " << oid_a << " gid not consistent.";
+          ret = false;
+          continue;
+        }
+        if (!vertex_map.GetOid(gid_a, oid_b)) {
+          LOG(ERROR) << "Vertex " << gid_a << " not found by gid in vertex "
+                     << "map.";
+          ret = false;
+          continue;
+        }
+        if (oid_a != oid_b) {
+          LOG(ERROR) << "Vertex " << gid_a << " oid not consistent.";
+          ret = false;
+          continue;
+        }
+        all_maps_g2o[gid_a % fnum].emplace_back(gid_a, oid_a);
+        all_maps_o2g[std::hash<OID_T>{}(oid_a) % fnum].emplace_back(oid_a,
+                                                                    gid_a);
+      }
+    }
+  }
 
-template <typename T>
-struct ParamConverter {};
-
-template <>
-struct ParamConverter<int64_t> {
-  static int64_t FromInt64(int64_t val) { return val; }
-};
-
-template <>
-struct ParamConverter<std::string> {
-  static std::string FromInt64(int64_t val) { return std::to_string(val); }
-};
-
-template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T,
-          grape::LoadStrategy load_strategy, template <class> class APP_T,
-          typename... Args>
-void CreateAndQuery(const grape::CommSpec& comm_spec,
-                    const std::string& out_prefix, int fnum,
-                    const grape::ParallelEngineSpec& spec, Args... args) {
-  timer_next("load graph");
-  grape::LoadGraphSpec graph_spec = grape::DefaultLoadGraphSpec();
-  graph_spec.set_directed(false);
-  graph_spec.set_rebalance(FLAGS_rebalance, FLAGS_rebalance_vertex_factor);
-  if (!FLAGS_delta_efile.empty() || !FLAGS_delta_vfile.empty()) {
-    graph_spec.set_rebalance(false, 0);
-    if (FLAGS_global_vertex_map) {
-      using VertexMapType = grape::GlobalVertexMap<OID_T, VID_T>;
-      using FRAG_T =
-          grape::MutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                        load_strategy, VertexMapType>;
-      std::shared_ptr<FRAG_T> fragment = grape::LoadGraphAndMutate<FRAG_T>(
-          FLAGS_efile, FLAGS_vfile, FLAGS_delta_efile, FLAGS_delta_vfile,
-          comm_spec, graph_spec);
-      using AppType = APP_T<FRAG_T>;
-      auto app = std::make_shared<AppType>();
-      DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec,
-                                        out_prefix, args...);
-    } else {
-      using VertexMapType = grape::LocalVertexMap<OID_T, VID_T>;
-      using FRAG_T =
-          grape::MutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                        load_strategy, VertexMapType>;
-      std::shared_ptr<FRAG_T> fragment = grape::LoadGraphAndMutate<FRAG_T>(
-          FLAGS_efile, FLAGS_vfile, FLAGS_delta_efile, FLAGS_delta_vfile,
-          comm_spec, graph_spec);
-      using AppType = APP_T<FRAG_T>;
-      auto app = std::make_shared<AppType>();
-      DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec,
-                                        out_prefix, args...);
+  {
+    std::vector<std::vector<std::pair<VID_T, OID_T>>> all_maps_g2o_in(fnum);
+    grape::sync_comm::AllToAll(all_maps_g2o, all_maps_g2o_in, comm_spec.comm());
+
+    std::vector<std::pair<VID_T, OID_T>> all_maps_merged;
+    for (auto& maps : all_maps_g2o_in) {
+      all_maps_merged.insert(all_maps_merged.end(), maps.begin(), maps.end());
     }
-  } else {
-    if (FLAGS_segmented_partition) {
-      if (FLAGS_global_vertex_map) {
-        using VertexMapType =
-            grape::GlobalVertexMap<OID_T, VID_T,
-                                   grape::SegmentedPartitioner<OID_T>>;
-        using FRAG_T =
-            grape::ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                            load_strategy, VertexMapType>;
-        std::shared_ptr<FRAG_T> fragment = grape::LoadGraph<FRAG_T>(
-            FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
-        using AppType = APP_T<FRAG_T>;
-        auto app = std::make_shared<AppType>();
-        DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec,
-                                          out_prefix, args...);
-      } else {
-        using VertexMapType =
-            grape::LocalVertexMap<OID_T, VID_T,
-                                  grape::SegmentedPartitioner<OID_T>>;
-        using FRAG_T =
-            grape::ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                            load_strategy, VertexMapType>;
-        std::shared_ptr<FRAG_T> fragment = grape::LoadGraph<FRAG_T>(
-            FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
-        using AppType = APP_T<FRAG_T>;
-        auto app = std::make_shared<AppType>();
-        DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec,
-                                          out_prefix, args...);
+
+    std::sort(all_maps_merged.begin(), all_maps_merged.end());
+    for (size_t i = 1; i < all_maps_merged.size(); ++i) {
+      if (all_maps_merged[i].first == all_maps_merged[i - 1].first) {
+        if (all_maps_merged[i].second != all_maps_merged[i - 1].second) {
+          LOG(ERROR) << "Vertex " << all_maps_merged[i].first
+                     << " has different oid in different fragments.";
+          ret = false;
+        }
       }
-    } else {
-      graph_spec.set_rebalance(false, 0);
-      if (FLAGS_global_vertex_map) {
-        using VertexMapType = grape::GlobalVertexMap<OID_T, VID_T>;
-        using FRAG_T =
-            grape::ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                            load_strategy, VertexMapType>;
-        std::shared_ptr<FRAG_T> fragment = grape::LoadGraph<FRAG_T>(
-            FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
-        using AppType = APP_T<FRAG_T>;
-        auto app = std::make_shared<AppType>();
-        DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec,
-                                          out_prefix, args...);
-      } else {
-        using VertexMapType = grape::LocalVertexMap<OID_T, VID_T>;
-        using FRAG_T =
-            grape::ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T,
-                                            load_strategy, VertexMapType>;
-        std::shared_ptr<FRAG_T> fragment = grape::LoadGraph<FRAG_T>(
-            FLAGS_efile, FLAGS_vfile, comm_spec, graph_spec);
-        using AppType = APP_T<FRAG_T>;
-        auto app = std::make_shared<AppType>();
-        DoQuery<FRAG_T, AppType, Args...>(fragment, app, comm_spec, spec,
-                                          out_prefix, args...);
+    }
+  }
+
+  {
+    std::vector<std::vector<std::pair<OID_T, VID_T>>> all_maps_o2g_in(fnum);
+    grape::sync_comm::AllToAll(all_maps_o2g, all_maps_o2g_in, comm_spec.comm());
+
+    std::vector<std::pair<OID_T, VID_T>> all_maps_merged;
+    for (auto& maps : all_maps_o2g_in) {
+      all_maps_merged.insert(all_maps_merged.end(), maps.begin(), maps.end());
+    }
+
+    std::sort(all_maps_merged.begin(), all_maps_merged.end());
+    for (size_t i = 1; i < all_maps_merged.size(); ++i) {
+      if (all_maps_merged[i].first == all_maps_merged[i - 1].first) {
+        if (all_maps_merged[i].second != all_maps_merged[i - 1].second) {
+          LOG(ERROR) << "Vertex " << all_maps_merged[i].first
+                     << " has different gid in different fragments.";
+          ret = false;
+        }
       }
     }
   }
+
+  return ret;
+}
+
+template <typename FRAG_T, typename VERTEX_MAP_T>
+bool verify_fragment_vertex_map(const FRAG_T& frag,
+                                const VERTEX_MAP_T& vertex_map) {
+  auto inner_vertices = frag.InnerVertices();
+  auto outer_vertices = frag.OuterVertices();
+  using vid_t = typename FRAG_T::vid_t;
+  using oid_t = typename FRAG_T::oid_t;
+  for (auto v : inner_vertices) {
+    vid_t gid = frag.GetInnerVertexGid(v);
+    oid_t oid;
+    if (!vertex_map.GetOid(gid, oid)) {
+      LOG(ERROR) << "Vertex " << gid << " not found in vertex map.";
+      return false;
+    }
+  }
+  for (auto v : outer_vertices) {
+    vid_t gid = frag.GetOuterVertexGid(v);
+    oid_t oid;
+    if (!vertex_map.GetOid(gid, oid)) {
+      LOG(ERROR) << "Vertex " << gid << " not found in vertex map.";
+      return false;
+    }
+  }
+  return true;
 }
 
 template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T>
-void Run() {
-  grape::CommSpec comm_spec;
-  comm_spec.Init(MPI_COMM_WORLD);
+void test_build_vertex_map(const std::string& efile, const std::string& vfile,
+                           const grape::LoadGraphSpec& graph_spec,
+                           const grape::CommSpec& comm_spec) {
+  using FRAG_T =
+      grape::ImmutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T>;
+  std::shared_ptr<FRAG_T> fragment =
+      grape::LoadGraph<FRAG_T>(efile, vfile, comm_spec, graph_spec);
+
+  verify_fragment_vertex_map(*fragment, fragment->GetVertexMap());
+  verify_vertex_map(comm_spec, fragment->GetVertexMap());
+}
 
-  bool is_coordinator = comm_spec.worker_id() == grape::kCoordinatorRank;
-  timer_start(is_coordinator);
+template <typename OID_T, typename VID_T, typename VDATA_T, typename EDATA_T>
+void test_mutate_vertex_map(const std::string& efile_base,
+                            const std::string& vfile,
+                            const std::string& efile_delta,
+                            const grape::LoadGraphSpec& graph_spec,
+                            const grape::CommSpec& comm_spec) {
+  using FRAG_T = grape::MutableEdgecutFragment<OID_T, VID_T, VDATA_T, EDATA_T>;
+  std::shared_ptr<FRAG_T> fragment = grape::LoadGraphAndMutate<FRAG_T>(
+      efile_base, vfile, efile_delta, "", comm_spec, graph_spec);
 
-  // FIXME: no barrier apps. more manager? or use a dynamic-cast.
-  auto spec = grape::MultiProcessSpec(comm_spec, __AFFINITY__);
-  int fnum = comm_spec.fnum();
-  CreateAndQuery<OID_T, VID_T, VDATA_T, EDATA_T, grape::LoadStrategy::kOnlyOut,
-                 grape::SSSP, OID_T>(
-      comm_spec, FLAGS_out_prefix, fnum, spec,
-      ParamConverter<OID_T>::FromInt64(FLAGS_sssp_source));
+  verify_fragment_vertex_map(*fragment, fragment->GetVertexMap());
+  verify_vertex_map(comm_spec, fragment->GetVertexMap());
 }
 
 int main(int argc, char* argv[]) {
   FLAGS_stderrthreshold = 0;
-
   grape::gflags::SetUsageMessage(
       "Usage: mpiexec [mpi_opts] ./run_app [grape_opts]");
   if (argc == 1) {
-    grape::gflags::ShowUsageWithFlagsRestrict(argv[0], "analytical_apps");
+    grape::gflags::ShowUsageWithFlagsRestrict(argv[0], "vertex_map_tests");
     exit(1);
   }
   grape::gflags::ParseCommandLineFlags(&argc, &argv, true);
   grape::gflags::ShutDownCommandLineFlags();
 
-  google::InitGoogleLogging("analytical_apps");
+  google::InitGoogleLogging("vertex_map_tests");
   google::InstallFailureSignalHandler();
 
   Init();
 
-  if (FLAGS_string_id) {
-    Run<std::string, uint32_t, grape::EmptyType, double>();
-  } else {
-    Run<int64_t, uint32_t, grape::EmptyType, double>();
+  std::vector<bool> string_id_options({false, true});
+  std::vector<bool> rebalance_options({false, true});
+  std::vector<grape::PartitionerType> partitioner_options(
+      {grape::PartitionerType::kHashPartitioner,
+       grape::PartitionerType::kMapPartitioner,
+       grape::PartitionerType::kSegmentedPartitioner});
+  std::vector<grape::IdxerType> idxer_options(
+      {grape::IdxerType::kHashMapIdxer, grape::IdxerType::kHashMapIdxerView,
+       grape::IdxerType::kPTHashIdxer, grape::IdxerType::kSortedArrayIdxer,
+       grape::IdxerType::kLocalIdxer});
+
+  {
+    grape::CommSpec comm_spec;
+    comm_spec.Init(MPI_COMM_WORLD);
+    int idx = 0;
+    for (auto string_id : string_id_options) {
+      for (auto rebalance : rebalance_options) {
+        for (auto partitioner_type : partitioner_options) {
+          for (auto idxer_type : idxer_options) {
+            if (rebalance) {
+              if (partitioner_type ==
+                  grape::PartitionerType::kHashPartitioner) {
+                continue;
+              }
+            }
+            if (idxer_type == grape::IdxerType::kLocalIdxer) {
+              if (partitioner_type !=
+                  grape::PartitionerType::kHashPartitioner) {
+                continue;
+              }
+            }
+            bool vm_extendable =
+                (idxer_type == grape::IdxerType::kHashMapIdxer ||
+                 idxer_type == grape::IdxerType::kLocalIdxer);
+            VLOG(2) << "Test " << idx++ << ": string_id=" << string_id
+                    << ", rebalance=" << rebalance
+                    << ", partitioner_type=" << partitioner_type
+                    << ", idxer_type=" << idxer_type;
+            grape::LoadGraphSpec graph_spec = grape::DefaultLoadGraphSpec();
+            graph_spec.set_directed(false);
+            if (rebalance) {
+              graph_spec.set_rebalance(true, 0);
+            } else {
+              graph_spec.set_rebalance(false, 0);
+            }
+            graph_spec.partitioner_type = partitioner_type;
+            graph_spec.idxer_type = idxer_type;
+
+            graph_spec.set_serialize(true, FLAGS_serialization_prefix);
+            if (string_id) {
+              test_build_vertex_map<std::string, uint32_t, grape::EmptyType,
+                                    double>(FLAGS_efile, FLAGS_vfile,
+                                            graph_spec, comm_spec);
+              if (vm_extendable) {
+                test_mutate_vertex_map<std::string, uint32_t, grape::EmptyType,
+                                       double>(
+                    FLAGS_mutable_efile_base, FLAGS_vfile,
+                    FLAGS_mutable_efile_delta, graph_spec, comm_spec);
+              }
+            } else {
+              test_build_vertex_map<int64_t, uint32_t, grape::EmptyType,
+                                    double>(FLAGS_efile, FLAGS_vfile,
+                                            graph_spec, comm_spec);
+              if (vm_extendable) {
+                test_mutate_vertex_map<int64_t, uint32_t, grape::EmptyType,
+                                       double>(
+                    FLAGS_mutable_efile_base, FLAGS_vfile,
+                    FLAGS_mutable_efile_delta, graph_spec, comm_spec);
+              }
+            }
+
+            graph_spec.set_deserialize(true, FLAGS_serialization_prefix);
+            if (string_id) {
+              test_build_vertex_map<std::string, uint32_t, grape::EmptyType,
+                                    double>(FLAGS_efile, FLAGS_vfile,
+                                            graph_spec, comm_spec);
+              if (vm_extendable) {
+                test_mutate_vertex_map<std::string, uint32_t, grape::EmptyType,
+                                       double>(
+                    FLAGS_mutable_efile_base, FLAGS_vfile,
+                    FLAGS_mutable_efile_delta, graph_spec, comm_spec);
+              }
+            } else {
+              test_build_vertex_map<int64_t, uint32_t, grape::EmptyType,
+                                    double>(FLAGS_efile, FLAGS_vfile,
+                                            graph_spec, comm_spec);
+              if (vm_extendable) {
+                test_mutate_vertex_map<int64_t, uint32_t, grape::EmptyType,
+                                       double>(
+                    FLAGS_mutable_efile_base, FLAGS_vfile,
+                    FLAGS_mutable_efile_delta, graph_spec, comm_spec);
+              }
+            }
+          }
+        }
+      }
+    }
   }
 
   Finalize();
diff --git a/thirdparty/flat_hash_map/flat_hash_map.hpp b/thirdparty/flat_hash_map/flat_hash_map.hpp
index 67760a45..b1f5bb16 100644
--- a/thirdparty/flat_hash_map/flat_hash_map.hpp
+++ b/thirdparty/flat_hash_map/flat_hash_map.hpp
@@ -913,6 +913,11 @@ class sherwood_v3_table : private EntryAlloc, public Hasher, public Equal
         deallocate_data(entries, num_slots_minus_one, max_lookups);
     }
 
+    size_t memory_usage() const 
+    {
+        return (num_slots_minus_one + max_lookups + 1) * sizeof(Entry);
+    }
+
     const allocator_type & get_allocator() const
     {
         return static_cast<const allocator_type &>(*this);
diff --git a/thirdparty/pthash/builders/external_memory_builder_single_phf.hpp b/thirdparty/pthash/builders/external_memory_builder_single_phf.hpp
new file mode 100644
index 00000000..605f4809
--- /dev/null
+++ b/thirdparty/pthash/builders/external_memory_builder_single_phf.hpp
@@ -0,0 +1,753 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "pthash/builders/search.hpp"
+#include "pthash/builders/util.hpp"
+#include "pthash/mm_file/mm_file.hpp"
+
+#include "pthash/utils/bucketers.hpp"
+#include "pthash/utils/hasher.hpp"
+#include "pthash/utils/logger.hpp"
+
+namespace pthash {
+
+template <typename Hasher>
+struct external_memory_builder_single_phf {
+  typedef Hasher hasher_type;
+
+  external_memory_builder_single_phf()
+      : m_pilots_filename(""), m_free_slots_filename("") {}
+  // non construction-copyable
+  external_memory_builder_single_phf(
+      external_memory_builder_single_phf const&) = delete;
+  // non copyable
+  external_memory_builder_single_phf& operator=(
+      external_memory_builder_single_phf const&) = delete;
+
+  ~external_memory_builder_single_phf() {
+    if (m_pilots_filename != "")
+      std::remove(m_pilots_filename.c_str());
+    m_pilots_filename = "";
+    if (m_free_slots_filename != "")
+      std::remove(m_free_slots_filename.c_str());
+    m_free_slots_filename = "";
+  }
+
+  template <typename Iterator>
+  build_timings build_from_keys(Iterator keys, uint64_t num_keys,
+                                build_configuration const& config) {
+    assert(num_keys > 1);
+    if (config.alpha == 0 or config.alpha > 1.0) {
+      throw std::invalid_argument("load factor must be > 0 and <= 1.0");
+    }
+
+    build_timings time;
+    uint64_t table_size = static_cast<double>(num_keys) / config.alpha;
+    if ((table_size & (table_size - 1)) == 0)
+      table_size += 1;
+    uint64_t num_buckets =
+        std::ceil((config.c * num_keys) / std::log2(num_keys));
+
+    if (sizeof(bucket_id_type) != sizeof(uint64_t) and
+        num_buckets > (1ULL << (sizeof(bucket_id_type) * 8))) {
+      throw std::runtime_error(
+          "using too many buckets: change bucket_id_type to uint64_t or use a "
+          "smaller c");
+    }
+
+    m_num_keys = num_keys;
+    m_table_size = table_size;
+    m_num_buckets = num_buckets;
+    m_seed =
+        config.seed == constants::invalid_seed ? random_value() : config.seed;
+    m_bucketer.init(num_buckets);
+
+    uint64_t ram = config.ram;
+
+    uint64_t bitmap_taken_bytes = 8 * ((table_size + 63) / 64);
+    uint64_t hashed_pilots_cache_bytes = search_cache_size * sizeof(uint64_t);
+    if (bitmap_taken_bytes + hashed_pilots_cache_bytes >= ram) {
+      std::stringstream ss;
+      ss << "not enough RAM available, the bitmap alone takes "
+         << static_cast<double>(bitmap_taken_bytes) / 1000000000
+         << " GB of space.";
+      throw std::runtime_error(ss.str());
+    }
+
+    if (config.verbose_output) {
+      constexpr uint64_t GB = 1000000000;
+      uint64_t peak =
+          num_keys * (sizeof(bucket_payload_pair) + sizeof(uint64_t)) +
+          (num_keys + num_buckets) * sizeof(uint64_t);
+      std::cout << "c = " << config.c << std::endl;
+      std::cout << "alpha = " << config.alpha << std::endl;
+      std::cout << "num_keys = " << num_keys << std::endl;
+      std::cout << "table_size = " << table_size << std::endl;
+      std::cout << "num_buckets = " << num_buckets << std::endl;
+      std::cout << "using " << static_cast<double>(ram) / GB << " GB of RAM"
+                << " (" << static_cast<double>(bitmap_taken_bytes) / GB
+                << " GB occupied by the bitmap)" << std::endl;
+      std::cout << "using a peak of " << static_cast<double>(peak) / GB
+                << " GB of disk space" << std::endl;
+    }
+
+    uint64_t run_identifier = clock_type::now().time_since_epoch().count();
+    temporary_files_manager tfm(config.tmp_dir, run_identifier);
+
+    uint64_t num_non_empty_buckets = 0;
+
+    try {
+      auto start = clock_type::now();
+      {
+        auto start = clock_type::now();
+        std::vector<pairs_t> pairs_blocks;
+        map(keys, num_keys, pairs_blocks, tfm, config);
+        auto stop = clock_type::now();
+        if (config.verbose_output) {
+          std::cout << " == map+sort " << tfm.get_num_pairs_files()
+                    << " files(s) took: " << seconds(stop - start) << " seconds"
+                    << std::endl;
+        }
+        start = clock_type::now();
+        buckets_t buckets = tfm.buckets(config);
+        merge(pairs_blocks, buckets, config.verbose_output);
+        buckets.flush();
+        for (auto& pairs_block : pairs_blocks)
+          pairs_block.close();
+        num_non_empty_buckets = buckets.num_buckets();
+        tfm.remove_all_pairs_files();
+        stop = clock_type::now();
+        if (config.verbose_output) {
+          std::cout << " == merge+check took: " << seconds(stop - start)
+                    << " seconds" << std::endl;
+          std::cout << " == max bucket size = " << int(tfm.max_bucket_size())
+                    << std::endl;
+        }
+      }
+      auto stop = clock_type::now();
+      time.mapping_ordering_seconds = seconds(stop - start);
+      if (config.verbose_output) {
+        std::cout << " == map+ordering took " << time.mapping_ordering_seconds
+                  << " seconds" << std::endl;
+      }
+    } catch (...) {
+      tfm.remove_all_pairs_files();
+      tfm.remove_all_merge_files();
+      throw;
+    }
+
+    try {
+      auto start = clock_type::now();
+      bit_vector_builder taken(m_table_size);
+
+      {  // search
+        auto buckets_iterator = tfm.buckets_iterator();
+
+        // write all bucket-pilot pairs to files
+        uint64_t ram_for_pilots =
+            ram - bitmap_taken_bytes - hashed_pilots_cache_bytes;
+        auto pilots = tfm.get_multifile_pairs_writer(num_non_empty_buckets,
+                                                     ram_for_pilots, 1, 0);
+
+        search(m_num_keys, m_num_buckets, num_non_empty_buckets, m_seed, config,
+               buckets_iterator, taken, pilots);
+
+        pilots.flush();
+        buckets_iterator.close();
+        // merge all sorted bucket-pilot pairs on a single file, saving only the
+        // pilot
+        pilots_merger_t pilots_merger(tfm.get_pilots_filename(), ram);
+        merge(tfm.pairs_blocks(), pilots_merger, false);
+        pilots_merger.finalize_and_close(m_num_buckets);
+
+        if (m_pilots_filename != "")
+          std::remove(m_pilots_filename.c_str());
+        m_pilots_filename = tfm.get_pilots_filename();
+
+        // remove unused temporary files
+        tfm.remove_all_pairs_files();
+        tfm.remove_all_merge_files();
+      }
+
+      if (config.minimal_output) {  // fill free slots
+        // write all free slots to file
+        buffered_file_t<uint64_t> writer(tfm.get_free_slots_filename(),
+                                         ram - bitmap_taken_bytes);
+        fill_free_slots(taken, num_keys, writer);
+        writer.close();
+        if (m_free_slots_filename != "")
+          std::remove(m_free_slots_filename.c_str());
+        m_free_slots_filename = tfm.get_free_slots_filename();
+      }
+
+      auto stop = clock_type::now();
+      time.searching_seconds = seconds(stop - start);
+      if (config.verbose_output) {
+        std::cout << " == search took " << time.searching_seconds << " seconds"
+                  << std::endl;
+      }
+    } catch (...) {
+      tfm.remove_all_pairs_files();
+      tfm.remove_all_merge_files();
+      throw;
+    }
+
+    return time;
+  }
+
+  uint64_t seed() const { return m_seed; }
+
+  uint64_t num_keys() const { return m_num_keys; }
+
+  uint64_t table_size() const { return m_table_size; }
+
+  skew_bucketer bucketer() const { return m_bucketer; }
+
+  mm::file_source<uint64_t> pilots() const {
+    return mm::file_source<uint64_t>(m_pilots_filename);
+  }
+
+  mm::file_source<uint64_t> free_slots() const {
+    return mm::file_source<uint64_t>(m_free_slots_filename);
+  }
+
+ private:
+  uint64_t m_seed;
+  uint64_t m_num_keys;
+  uint64_t m_table_size;
+  uint64_t m_num_buckets;
+  skew_bucketer m_bucketer;
+  std::string m_pilots_filename;
+  std::string m_free_slots_filename;
+
+  template <typename T>
+  struct buffer_t {
+    buffer_t(uint64_t ram) : m_buffer_capacity(ram / sizeof(T)) {
+      m_buffer.reserve(m_buffer_capacity);
+      assert(m_buffer_capacity > 0);
+    }
+
+    template <class... _Args>
+    void emplace_back(_Args&&... __args) {
+      m_buffer.emplace_back(std::forward<_Args>(__args)...);
+      if (--m_buffer_capacity == 0)
+        flush();
+    }
+
+    void flush() {
+      if (!m_buffer.empty()) {
+        uint64_t buffer_size = m_buffer.size();
+        flush_impl(m_buffer);
+        m_buffer_capacity += buffer_size;
+        m_buffer.clear();
+      }
+    }
+
+   protected:
+    virtual void flush_impl(std::vector<T>& buffer) = 0;
+
+   private:
+    uint64_t m_buffer_capacity;
+    std::vector<T> m_buffer;
+  };
+
+  template <typename T>
+  struct buffered_file_t : buffer_t<T> {
+    buffered_file_t(std::string const& filename, uint64_t ram)
+        : buffer_t<T>(ram) {
+      m_out.open(filename, std::ofstream::out | std::ofstream::binary);
+      if (!m_out.is_open())
+        throw std::runtime_error("cannot open binary file in write mode");
+    }
+
+    void close() {
+      buffer_t<T>::flush();
+      m_out.close();
+    }
+
+   protected:
+    void flush_impl(std::vector<T>& buffer) {
+      m_out.write(reinterpret_cast<char const*>(buffer.data()),
+                  buffer.size() * sizeof(T));
+    }
+
+   private:
+    std::ofstream m_out;
+  };
+
+  template <typename T>
+  struct memory_view {
+    typedef T* iterator;
+    typedef const T* const_iterator;
+
+    memory_view() : m_begin(nullptr), m_end(nullptr){};
+    memory_view(T* begin, uint64_t size)
+        : m_begin(begin), m_end(begin + size) {}
+
+    inline T* begin() const { return m_begin; }
+    inline T* end() const { return m_end; }
+    inline T& operator[](uint64_t pos) const { return *(m_begin + pos); }
+    inline uint64_t size() const { return std::distance(m_begin, m_end); }
+
+   protected:
+    T *m_begin, *m_end;
+  };
+
+  template <typename T>
+  struct reader_t : memory_view<const T> {
+    void open(std::string const& filename) {
+      if (m_is.is_open())
+        m_is.close();
+      m_is.open(filename, mm::advice::sequential);
+      if (!m_is.is_open())
+        throw std::runtime_error("cannot open temporary file (read)");
+      memory_view<const T>::m_begin = m_is.data();
+      memory_view<const T>::m_end = m_is.data() + m_is.size();
+    }
+
+    void close() { m_is.close(); }
+
+   private:
+    mm::file_source<T> m_is;
+  };
+
+  typedef reader_t<bucket_payload_pair> pairs_t;
+
+  struct pairs_merger_t {
+    pairs_merger_t(std::string const& filename, uint64_t ram)
+        : m_buffer(filename, ram) {}
+
+    template <typename HashIterator>
+    void add(bucket_id_type bucket_id, bucket_size_type bucket_size,
+             HashIterator hashes) {
+      for (uint64_t k = 0; k != bucket_size; ++k, ++hashes) {
+        m_buffer.emplace_back(bucket_id, *hashes);
+      }
+    }
+
+    void close() { m_buffer.close(); }
+
+   private:
+    buffered_file_t<bucket_payload_pair> m_buffer;
+  };
+
+  struct buckets_t {  // merger
+    buckets_t(std::vector<std::string> const& filenames, uint64_t ram,
+              std::vector<bool>& used_bucket_sizes)
+        : m_filenames(filenames),
+          m_buffers(filenames.size()),
+          m_buffer_capacity(ram / (sizeof(uint64_t) * 2)),
+          m_ram(ram / (sizeof(uint64_t) * 2)),
+          m_used_bucket_sizes(used_bucket_sizes),
+          m_outs(filenames.size()),
+          m_num_buckets(0) {
+      assert(m_filenames.size() == m_used_bucket_sizes.size());
+      m_non_empty_buckets.reserve(filenames.size());
+      for (uint64_t i = 0; i != filenames.size(); ++i) {
+        if (m_used_bucket_sizes[i]) {
+          throw std::runtime_error("One of the output files is already open");
+        }
+      }
+    }
+
+    template <typename HashIterator>
+    void add(bucket_id_type bucket_id, bucket_size_type bucket_size,
+             HashIterator hashes) {
+      assert(bucket_size > 0 and bucket_size <= MAX_BUCKET_SIZE);
+      ensure_capacity(bucket_size);
+      uint64_t i = bucket_size - 1;
+      if (m_buffers[i].empty())
+        m_non_empty_buckets.push_back(bucket_size - 1);
+      m_buffers[i].push_back(bucket_id);
+      for (uint64_t k = 0; k != bucket_size; ++k, ++hashes)
+        m_buffers[i].push_back(*hashes);
+      m_buffer_capacity -= bucket_size + 1;
+      ++m_num_buckets;
+    }
+
+    uint64_t num_buckets() const { return m_num_buckets; };
+
+    void flush() {
+      for (uint64_t i = 0; i != m_buffers.size(); ++i)
+        flush_i(i);
+      m_non_empty_buckets.clear();
+    }
+
+   private:
+    void ensure_capacity(uint64_t bucket_size) {
+      if (bucket_size + 1 > m_buffer_capacity) {
+        std::sort(m_non_empty_buckets.begin(), m_non_empty_buckets.end(),
+                  [&](uint64_t i, uint64_t j) {
+                    return m_buffers[i].size() < m_buffers[j].size();
+                  });
+
+        uint64_t target =
+            std::max((uint64_t) std::ceil(0.999 * m_ram), bucket_size + 1);
+        while (m_buffer_capacity < target) {
+          flush_i(m_non_empty_buckets.back());
+          m_non_empty_buckets.pop_back();
+        }
+      }
+    }
+
+    void flush_i(uint64_t i) {
+      if (m_buffers[i].size() == 0)
+        return;
+      if (!m_used_bucket_sizes[i]) {
+        m_outs[i].open(m_filenames[i].c_str(),
+                       std::ofstream::out | std::ofstream::binary);
+        if (!m_outs[i].is_open()) {
+          throw std::runtime_error("cannot open temporary file (write)");
+        }
+        m_used_bucket_sizes[i] = true;
+      }
+      m_outs[i].write(reinterpret_cast<char const*>(m_buffers[i].data()),
+                      m_buffers[i].size() * sizeof(uint64_t));
+      m_buffer_capacity += m_buffers[i].size();
+      std::vector<uint64_t>().swap(m_buffers[i]);
+    }
+
+    std::vector<std::string> m_filenames;
+    std::vector<std::vector<uint64_t>> m_buffers;
+    uint64_t m_buffer_capacity;
+    uint64_t m_ram;
+    std::vector<uint64_t> m_non_empty_buckets;
+    std::vector<bool>& m_used_bucket_sizes;
+    std::vector<std::ofstream> m_outs;
+    uint64_t m_num_buckets;
+  };
+
+  struct buckets_iterator_t {
+    buckets_iterator_t(
+        std::vector<std::pair<bucket_size_type, std::string>> const&
+            sizes_filenames)
+        : m_sizes(sizes_filenames.size()), m_sources(sizes_filenames.size()) {
+      m_pos = sizes_filenames.size();
+      for (uint64_t i = 0, i_end = m_pos; i < i_end; ++i) {
+        m_sizes[i] = sizes_filenames[i].first;
+        m_sources[i].open(sizes_filenames[i].second, mm::advice::sequential);
+        assert(i == 0 or m_sizes[i - 1] < m_sizes[i]);
+      }
+      read_next_file();
+    }
+
+    void close() {
+      for (auto& is : m_sources)
+        is.close();
+    }
+
+    inline bucket_t operator*() {
+      bucket_t bucket;
+      bucket.init(m_it, m_bucket_size);
+      return bucket;
+    }
+
+    void operator++() {
+      m_it += m_bucket_size + 1;
+      if (m_it >= m_end)
+        read_next_file();
+    }
+
+   private:
+    void read_next_file() {
+      if (m_pos == 0) {
+        m_it = m_end;
+        return;
+      }
+      --m_pos;
+      m_bucket_size = m_sizes[m_pos];
+      m_it = m_sources[m_pos].data();
+      m_end = m_it + m_sources[m_pos].size();
+    }
+
+    uint64_t m_pos;
+    std::vector<bucket_size_type> m_sizes;
+    std::vector<mm::file_source<uint64_t>> m_sources;
+    bucket_size_type m_bucket_size;
+    uint64_t const* m_it;
+    uint64_t const* m_end;
+  };
+
+  struct pilots_merger_t {
+    pilots_merger_t(std::string const& filename, uint64_t ram)
+        : m_buffer(filename, ram), m_next_bucket_id(0) {}
+
+    template <typename HashIterator>
+    void add(bucket_id_type bucket_id, bucket_size_type bucket_size,
+             HashIterator hashes) {
+      assert(bucket_size == 1);
+      (void) bucket_size;  // avoid unused warning in release mode
+      emplace_back_and_fill(bucket_id, *hashes);
+    }
+
+    void finalize_and_close(uint64_t num_buckets) {
+      if (m_next_bucket_id < num_buckets)
+        emplace_back_and_fill(num_buckets - 1, 0);
+      m_buffer.close();
+    }
+
+   private:
+    inline void emplace_back_and_fill(bucket_id_type bucket_id,
+                                      uint64_t pilot) {
+      assert(m_next_bucket_id <= bucket_id);
+
+      while (m_next_bucket_id++ < bucket_id) {
+        m_buffer.emplace_back(0);
+      }
+      m_buffer.emplace_back(pilot);
+    }
+
+    buffered_file_t<uint64_t> m_buffer;
+    uint64_t m_next_bucket_id;
+  };
+
+  struct multifile_pairs_writer : buffer_t<bucket_payload_pair> {
+    multifile_pairs_writer(std::vector<std::string> const& filenames,
+                           uint64_t& num_pairs_files, uint64_t num_pairs,
+                           uint64_t ram, uint64_t num_threads_sort = 1,
+                           uint64_t ram_parallel_merge = 0)
+        : buffer_t<bucket_payload_pair>(get_balanced_ram(num_pairs, ram)),
+          m_filenames(filenames),
+          m_num_pairs_files(num_pairs_files),
+          m_num_threads_sort(num_threads_sort),
+          m_ram_parallel_merge(ram_parallel_merge) {
+      assert(num_threads_sort > 1 or ram_parallel_merge == 0);
+    }
+
+   protected:
+    void flush_impl(std::vector<bucket_payload_pair>& buffer) {
+      const uint64_t size = buffer.size();
+
+      if (m_num_threads_sort > 1) {  // parallel
+        std::vector<memory_view<bucket_payload_pair>> blocks;
+        uint64_t num_keys_per_thread =
+            (size + m_num_threads_sort - 1) / m_num_threads_sort;
+        auto exe = [&](uint64_t tid) {
+          std::sort(blocks[tid].begin(), blocks[tid].end());
+        };
+
+        std::vector<std::thread> threads(m_num_threads_sort);
+        for (uint64_t i = 0; i != m_num_threads_sort; ++i) {
+          auto begin = buffer.data() + i * num_keys_per_thread;
+          auto end =
+              buffer.data() + std::min((i + 1) * num_keys_per_thread, size);
+          uint64_t block_size = std::distance(begin, end);
+
+          blocks.emplace_back(begin, block_size);
+          threads[i] = std::thread(exe, i);
+        }
+        for (uint64_t i = 0; i != m_num_threads_sort; ++i) {
+          if (threads[i].joinable())
+            threads[i].join();
+        }
+        pairs_merger_t pairs_merger(m_filenames[m_num_pairs_files],
+                                    m_ram_parallel_merge);
+        ++m_num_pairs_files;
+        merge(blocks, pairs_merger, false);
+        pairs_merger.close();
+      } else {  // sequential
+        std::ofstream out(m_filenames[m_num_pairs_files],
+                          std::ofstream::out | std::ofstream::binary);
+        if (!out.is_open())
+          throw std::runtime_error("cannot open temporary file (write)");
+        ++m_num_pairs_files;
+        std::sort(buffer.begin(), buffer.end());
+        out.write(reinterpret_cast<char const*>(buffer.data()),
+                  size * sizeof(bucket_payload_pair));
+        out.close();
+      }
+    }
+
+   private:
+    std::vector<std::string> m_filenames;
+    uint64_t& m_num_pairs_files;
+    uint64_t m_num_threads_sort;
+    uint64_t m_ram_parallel_merge;
+
+    static uint64_t get_balanced_ram(uint64_t num_pairs, uint64_t ram) {
+      uint64_t num_pairs_per_file = ram / sizeof(bucket_payload_pair);
+      uint64_t num_temporary_files =
+          (num_pairs + num_pairs_per_file - 1) / num_pairs_per_file;
+      uint64_t balanced_num_pairs_per_temporary_file =
+          (num_pairs + num_temporary_files - 1) / num_temporary_files;
+      uint64_t balanced_ram =
+          balanced_num_pairs_per_temporary_file * sizeof(bucket_payload_pair);
+      assert(balanced_ram <= ram);
+
+      return balanced_ram;
+    }
+  };
+
+  struct temporary_files_manager {
+    temporary_files_manager(std::string const& dir_name,
+                            uint64_t run_identifier)
+        : m_dir_name(dir_name),
+          m_run_identifier(run_identifier),
+          m_num_pairs_files(0),
+          m_used_bucket_sizes(MAX_BUCKET_SIZE) {
+      std::fill(m_used_bucket_sizes.begin(), m_used_bucket_sizes.end(), false);
+    }
+
+    multifile_pairs_writer get_multifile_pairs_writer(
+        uint64_t num_pairs, uint64_t ram, uint64_t num_threads_sort = 1,
+        uint64_t ram_parallel_merge = 0) {
+      uint64_t num_pairs_per_file = ram / sizeof(bucket_payload_pair);
+      uint64_t num_temporary_files =
+          (num_pairs + num_pairs_per_file - 1) / num_pairs_per_file;
+      std::vector<std::string> filenames;
+      filenames.reserve(num_temporary_files);
+      for (uint64_t i = 0; i < num_temporary_files; ++i) {
+        filenames.emplace_back(get_pairs_filename(m_num_pairs_files + i));
+      }
+      return multifile_pairs_writer(filenames, m_num_pairs_files, num_pairs,
+                                    ram, num_threads_sort, ram_parallel_merge);
+    }
+
+    uint64_t get_num_pairs_files() const { return m_num_pairs_files; }
+
+    void remove_all_pairs_files() {
+      while (m_num_pairs_files > 0) {
+        std::remove(get_pairs_filename(--m_num_pairs_files).c_str());
+      }
+    }
+
+    void remove_all_merge_files() {
+      for (uint64_t i = 0; i != MAX_BUCKET_SIZE; ++i) {
+        if (m_used_bucket_sizes[i]) {
+          std::remove(get_buckets_filename(i + 1).c_str());
+          m_used_bucket_sizes[i] = false;
+        }
+      }
+    }
+
+    std::vector<pairs_t> pairs_blocks() const {
+      std::vector<pairs_t> result(m_num_pairs_files);
+      for (uint64_t i = 0; i != m_num_pairs_files; ++i)
+        result[i].open(get_pairs_filename(i));
+      return result;
+    };
+
+    buckets_t buckets(build_configuration const& config) {
+      std::vector<std::string> filenames;
+      filenames.reserve(MAX_BUCKET_SIZE);
+      for (uint64_t bucket_size = 1; bucket_size <= MAX_BUCKET_SIZE;
+           ++bucket_size) {
+        filenames.emplace_back(get_buckets_filename(bucket_size));
+      }
+      return buckets_t(filenames, config.ram, m_used_bucket_sizes);
+    }
+
+    buckets_iterator_t buckets_iterator() {
+      std::vector<std::pair<bucket_size_type, std::string>> sizes_filenames;
+      for (uint64_t i = 0; i != MAX_BUCKET_SIZE; ++i) {
+        if (m_used_bucket_sizes[i]) {
+          uint64_t bucket_size = i + 1;
+          sizes_filenames.emplace_back(bucket_size,
+                                       get_buckets_filename(bucket_size));
+        }
+      }
+      assert(sizes_filenames.size() > 0);
+      return buckets_iterator_t(sizes_filenames);
+    }
+
+    bucket_size_type max_bucket_size() {
+      bucket_size_type bucket_size = 0;
+      for (uint64_t i = 0, i_end = m_used_bucket_sizes.size(); i < i_end; ++i) {
+        if (m_used_bucket_sizes[i])
+          bucket_size = i;
+      }
+      return bucket_size + 1;
+    }
+
+    std::string get_pilots_filename() const {
+      std::stringstream filename;
+      filename << m_dir_name << "/pthash.tmp.run" << m_run_identifier
+               << ".pilots"
+               << ".bin";
+      return filename.str();
+    }
+
+    std::string get_free_slots_filename() const {
+      std::stringstream filename;
+      filename << m_dir_name << "/pthash.tmp.run" << m_run_identifier
+               << ".free_slots"
+               << ".bin";
+      return filename.str();
+    }
+
+   private:
+    std::string get_pairs_filename(uint32_t file_id) const {
+      std::stringstream filename;
+      filename << m_dir_name << "/pthash.tmp.run" << m_run_identifier
+               << ".pairs" << file_id << ".bin";
+      return filename.str();
+    }
+
+    std::string get_buckets_filename(bucket_size_type bucket_size) const {
+      std::stringstream filename;
+      filename << m_dir_name << "/pthash.tmp.run" << m_run_identifier << ".size"
+               << static_cast<uint32_t>(bucket_size) << ".bin";
+      return filename.str();
+    }
+
+    std::string m_dir_name;
+    uint64_t m_run_identifier;
+    uint64_t m_num_pairs_files;
+    std::vector<bool> m_used_bucket_sizes;
+  };
+
+  template <typename Iterator>
+  void map(Iterator keys, uint64_t num_keys, std::vector<pairs_t>& pairs_blocks,
+           temporary_files_manager& tfm, build_configuration const& config) {
+    progress_logger logger(num_keys, " == processed ", " keys from input",
+                           config.verbose_output);
+
+    uint64_t ram = config.ram;
+    uint64_t ram_parallel_merge = 0;
+    if (config.num_threads > 1) {
+      ram_parallel_merge = ram * 0.01;
+      assert(ram_parallel_merge >=
+             MAX_BUCKET_SIZE * sizeof(bucket_payload_pair));
+    }
+
+    auto writer =
+        tfm.get_multifile_pairs_writer(num_keys, ram - ram_parallel_merge,
+                                       config.num_threads, ram_parallel_merge);
+    try {
+      for (uint64_t i = 0; i != num_keys; ++i, ++keys) {
+        auto const& key = *keys;
+        auto hash = hasher_type::hash(key, m_seed);
+        bucket_id_type bucket_id = m_bucketer.bucket(hash.first());
+        writer.emplace_back(bucket_id, hash.second());
+        logger.log();
+      }
+      writer.flush();
+      logger.finalize();
+    } catch (std::runtime_error const& e) { throw e; }
+
+    auto tmp = tfm.pairs_blocks();
+    pairs_blocks.swap(tmp);
+  }
+};
+
+}  // namespace pthash
diff --git a/thirdparty/pthash/builders/internal_memory_builder_single_phf.hpp b/thirdparty/pthash/builders/internal_memory_builder_single_phf.hpp
new file mode 100644
index 00000000..df9be299
--- /dev/null
+++ b/thirdparty/pthash/builders/internal_memory_builder_single_phf.hpp
@@ -0,0 +1,365 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "pthash/builders/search.hpp"
+#include "pthash/builders/util.hpp"
+#include "pthash/utils/bucketers.hpp"
+#include "pthash/utils/hasher.hpp"
+#include "pthash/utils/logger.hpp"
+
+namespace pthash {
+
+template <typename Hasher>
+struct internal_memory_builder_single_phf {
+  typedef Hasher hasher_type;
+
+  template <typename RandomAccessIterator>
+  build_timings build_from_keys(RandomAccessIterator keys, uint64_t num_keys,
+                                build_configuration const& config) {
+    if (config.seed == constants::invalid_seed) {
+      for (auto attempt = 0; attempt < 10; ++attempt) {
+        m_seed = random_value();
+        try {
+          return build_from_hashes(
+              hash_generator<RandomAccessIterator>(keys, m_seed), num_keys,
+              config);
+        } catch (seed_runtime_error const& error) {
+          std::cout << "attempt " << attempt + 1 << " failed" << std::endl;
+        }
+      }
+      throw seed_runtime_error();
+    }
+    m_seed = config.seed;
+    return build_from_hashes(hash_generator<RandomAccessIterator>(keys, m_seed),
+                             num_keys, config);
+  }
+
+  template <typename RandomAccessIterator>
+  build_timings build_from_hashes(RandomAccessIterator hashes,
+                                  uint64_t num_keys,
+                                  build_configuration const& config) {
+    assert(num_keys > 1);
+    if (config.alpha == 0 or config.alpha > 1.0) {
+      throw std::invalid_argument("load factor must be > 0 and <= 1.0");
+    }
+
+    clock_type::time_point start;
+
+    start = clock_type::now();
+
+    build_timings time;
+
+    uint64_t table_size = static_cast<double>(num_keys) / config.alpha;
+    if ((table_size & (table_size - 1)) == 0)
+      table_size += 1;
+    uint64_t num_buckets =
+        (config.num_buckets == constants::invalid_num_buckets)
+            ? (std::ceil((config.c * num_keys) / std::log2(num_keys)))
+            : config.num_buckets;
+
+    m_num_keys = num_keys;
+    m_table_size = table_size;
+    m_num_buckets = num_buckets;
+    m_bucketer.init(m_num_buckets);
+
+    if (config.verbose_output) {
+      std::cout << "c = " << config.c << std::endl;
+      std::cout << "alpha = " << config.alpha << std::endl;
+      std::cout << "num_keys = " << num_keys << std::endl;
+      std::cout << "table_size = " << table_size << std::endl;
+      std::cout << "num_buckets = " << num_buckets << std::endl;
+    }
+
+    buckets_t buckets;
+    {
+      auto start = clock_type::now();
+      std::vector<pairs_t> pairs_blocks;
+      map(hashes, num_keys, pairs_blocks, config);
+      auto elapsed = seconds(clock_type::now() - start);
+      if (config.verbose_output) {
+        std::cout << " == map+sort took: " << elapsed << " seconds"
+                  << std::endl;
+      }
+
+      start = clock_type::now();
+      merge(pairs_blocks, buckets, config.verbose_output);
+      elapsed = seconds(clock_type::now() - start);
+      if (config.verbose_output) {
+        std::cout << " == merge+check took: " << elapsed << " seconds"
+                  << std::endl;
+      }
+    }
+    auto buckets_iterator = buckets.begin();
+    time.mapping_ordering_seconds = seconds(clock_type::now() - start);
+    if (config.verbose_output) {
+      std::cout << " == mapping+ordering took " << time.mapping_ordering_seconds
+                << " seconds " << std::endl;
+      std::cout << " == max bucket size = " << int((*buckets_iterator).size())
+                << std::endl;
+    }
+
+    start = clock_type::now();
+    {
+      m_pilots.resize(num_buckets);
+      std::fill(m_pilots.begin(), m_pilots.end(), 0);
+      bit_vector_builder taken(m_table_size);
+      uint64_t num_non_empty_buckets = buckets.num_buckets();
+      pilots_wrapper_t pilots_wrapper(m_pilots);
+      search(m_num_keys, m_num_buckets, num_non_empty_buckets, m_seed, config,
+             buckets_iterator, taken, pilots_wrapper);
+      if (config.minimal_output) {
+        m_free_slots.clear();
+        m_free_slots.reserve(taken.size() - num_keys);
+        fill_free_slots(taken, num_keys, m_free_slots);
+      }
+    }
+    time.searching_seconds = seconds(clock_type::now() - start);
+    if (config.verbose_output) {
+      std::cout << " == search took " << time.searching_seconds << " seconds"
+                << std::endl;
+    }
+
+    return time;
+  }
+
+  uint64_t seed() const { return m_seed; }
+
+  uint64_t num_keys() const { return m_num_keys; }
+
+  uint64_t table_size() const { return m_table_size; }
+
+  skew_bucketer bucketer() const { return m_bucketer; }
+
+  std::vector<uint64_t> const& pilots() const { return m_pilots; }
+
+  std::vector<uint64_t> const& free_slots() const { return m_free_slots; }
+
+  void swap(internal_memory_builder_single_phf& other) {
+    std::swap(m_seed, other.m_seed);
+    std::swap(m_num_keys, other.m_num_keys);
+    std::swap(m_num_buckets, other.m_num_buckets);
+    std::swap(m_table_size, other.m_table_size);
+    std::swap(m_bucketer, other.m_bucketer);
+    m_pilots.swap(other.m_pilots);
+    m_free_slots.swap(other.m_free_slots);
+  }
+
+  template <typename Visitor>
+  void visit(Visitor& visitor) {
+    visitor.visit(m_seed);
+    visitor.visit(m_num_keys);
+    visitor.visit(m_num_buckets);
+    visitor.visit(m_table_size);
+    visitor.visit(m_bucketer);
+    visitor.visit(m_pilots);
+    visitor.visit(m_free_slots);
+  }
+
+  static size_t estimate_num_bytes_for_construction(
+      uint64_t num_keys, build_configuration const& config) {
+    uint64_t table_size = static_cast<double>(num_keys) / config.alpha;
+    if ((table_size & (table_size - 1)) == 0)
+      table_size += 1;
+    uint64_t num_buckets =
+        (config.num_buckets == constants::invalid_num_buckets)
+            ? (std::ceil((config.c * num_keys) / std::log2(num_keys)))
+            : config.num_buckets;
+
+    size_t mapping_bytes =
+        num_keys * sizeof(bucket_payload_pair)          // pairs
+        + (num_keys + num_buckets) * sizeof(uint64_t);  // buckets
+
+    size_t search_bytes =
+        num_buckets * sizeof(uint64_t)    // pilots
+        + num_buckets * sizeof(uint64_t)  // buckets
+        + (config.minimal_output ? (table_size - num_keys) * sizeof(uint64_t)
+                                 : 0)  // free_slots
+        + num_keys * sizeof(uint64_t)  // hashes
+        + table_size / 8;              // bitmap taken
+    return std::max<size_t>(mapping_bytes, search_bytes);
+  }
+
+ private:
+  uint64_t m_seed;
+  uint64_t m_num_keys;
+  uint64_t m_num_buckets;
+  uint64_t m_table_size;
+  skew_bucketer m_bucketer;
+  std::vector<uint64_t> m_pilots;
+  std::vector<uint64_t> m_free_slots;
+
+  template <typename RandomAccessIterator>
+  struct hash_generator {
+    hash_generator(RandomAccessIterator keys, uint64_t seed)
+        : m_iterator(keys), m_seed(seed) {}
+
+    inline typename hasher_type::hash_type operator*() {
+      return hasher_type::hash(*m_iterator, m_seed);
+    }
+
+    inline void operator++() { ++m_iterator; }
+
+    inline hash_generator operator+(uint64_t offset) const {
+      return hash_generator(m_iterator + offset, m_seed);
+    }
+
+   private:
+    RandomAccessIterator m_iterator;
+    uint64_t m_seed;
+  };
+
+  typedef std::vector<bucket_payload_pair> pairs_t;
+
+  struct buckets_iterator_t {
+    buckets_iterator_t(std::vector<std::vector<uint64_t>> const& buffers)
+        : m_buffers_it(buffers.end() - 1), m_bucket_size(buffers.size()) {
+      m_bucket.init(m_buffers_it->data(), m_bucket_size);
+      skip_empty_buckets();
+    }
+
+    inline void operator++() {
+      uint64_t const* begin = m_bucket.begin() + m_bucket_size;
+      uint64_t const* end = m_buffers_it->data() + m_buffers_it->size();
+      m_bucket.init(begin, m_bucket_size);
+      if ((m_bucket.begin() - 1) == end and m_bucket_size != 0) {
+        --m_bucket_size;
+        --m_buffers_it;
+        skip_empty_buckets();
+      }
+    }
+
+    inline bucket_t operator*() const { return m_bucket; }
+
+   private:
+    std::vector<std::vector<uint64_t>>::const_iterator m_buffers_it;
+    bucket_size_type m_bucket_size;
+    bucket_t m_bucket;
+
+    void skip_empty_buckets() {
+      while (m_bucket_size != 0 and m_buffers_it->empty()) {
+        --m_bucket_size;
+        --m_buffers_it;
+      }
+      if (m_bucket_size != 0)
+        m_bucket.init(m_buffers_it->data(), m_bucket_size);
+    }
+  };
+
+  struct buckets_t {
+    buckets_t() : m_buffers(MAX_BUCKET_SIZE), m_num_buckets(0) {}
+
+    template <typename HashIterator>
+    void add(bucket_id_type bucket_id, bucket_size_type bucket_size,
+             HashIterator hashes) {
+      assert(bucket_size > 0);
+      uint64_t i = bucket_size - 1;
+      m_buffers[i].push_back(bucket_id);
+      for (uint64_t k = 0; k != bucket_size; ++k, ++hashes)
+        m_buffers[i].push_back(*hashes);
+      ++m_num_buckets;
+    }
+
+    uint64_t num_buckets() const { return m_num_buckets; };
+
+    buckets_iterator_t begin() const { return buckets_iterator_t(m_buffers); }
+
+   private:
+    std::vector<std::vector<uint64_t>> m_buffers;
+    uint64_t m_num_buckets;
+  };
+
+  struct pilots_wrapper_t {
+    pilots_wrapper_t(std::vector<uint64_t>& pilots) : m_pilots(pilots) {}
+
+    inline void emplace_back(bucket_id_type bucket_id, uint64_t pilot) {
+      m_pilots[bucket_id] = pilot;
+    }
+
+   private:
+    std::vector<uint64_t>& m_pilots;
+  };
+
+  template <typename RandomAccessIterator>
+  void map_sequential(RandomAccessIterator hashes, uint64_t num_keys,
+                      std::vector<pairs_t>& pairs_blocks,
+                      build_configuration const&) const {
+    pairs_t pairs(num_keys);
+    RandomAccessIterator begin = hashes;
+    for (uint64_t i = 0; i != num_keys; ++i, ++begin) {
+      auto hash = *begin;
+      auto bucket_id = m_bucketer.bucket(hash.first());
+      pairs[i] = {static_cast<bucket_id_type>(bucket_id), hash.second()};
+    }
+    std::sort(pairs.begin(), pairs.end());
+    pairs_blocks.resize(1);
+    pairs_blocks.front().swap(pairs);
+  }
+
+  template <typename RandomAccessIterator>
+  void map_parallel(RandomAccessIterator hashes, uint64_t num_keys,
+                    std::vector<pairs_t>& pairs_blocks,
+                    build_configuration const& config) const {
+    pairs_blocks.resize(config.num_threads);
+    uint64_t num_keys_per_thread =
+        (num_keys + config.num_threads - 1) / config.num_threads;
+
+    auto exe = [&](uint64_t tid) {
+      auto& local_pairs = pairs_blocks[tid];
+      RandomAccessIterator begin = hashes + tid * num_keys_per_thread;
+      uint64_t local_num_keys = (tid != config.num_threads - 1)
+                                    ? num_keys_per_thread
+                                    : (num_keys - tid * num_keys_per_thread);
+      local_pairs.resize(local_num_keys);
+
+      for (uint64_t local_i = 0; local_i != local_num_keys;
+           ++local_i, ++begin) {
+        auto hash = *begin;
+        auto bucket_id = m_bucketer.bucket(hash.first());
+        local_pairs[local_i] = {static_cast<bucket_id_type>(bucket_id),
+                                hash.second()};
+      }
+      std::sort(local_pairs.begin(), local_pairs.end());
+    };
+
+    std::vector<std::thread> threads(config.num_threads);
+    for (uint64_t i = 0; i != config.num_threads; ++i)
+      threads[i] = std::thread(exe, i);
+    for (auto& t : threads) {
+      if (t.joinable())
+        t.join();
+    }
+  }
+
+  template <typename RandomAccessIterator>
+  void map(RandomAccessIterator hashes, uint64_t num_keys,
+           std::vector<pairs_t>& pairs_blocks,
+           build_configuration const& config) const {
+    if (config.num_threads > 1) {
+      map_parallel(hashes, num_keys, pairs_blocks, config);
+    } else {
+      map_sequential(hashes, num_keys, pairs_blocks, config);
+    }
+  }
+};
+
+}  // namespace pthash
diff --git a/thirdparty/pthash/builders/search.hpp b/thirdparty/pthash/builders/search.hpp
new file mode 100644
index 00000000..39244430
--- /dev/null
+++ b/thirdparty/pthash/builders/search.hpp
@@ -0,0 +1,358 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <math.h>   // for pow, round, log2
+#include <sstream>  // for stringbuf
+#include <vector>
+#include "pthash/essentials/essentials.hpp"
+
+#include "pthash/builders/util.hpp"
+#include "pthash/encoders/bit_vector.hpp"
+#include "pthash/utils/hasher.hpp"
+
+namespace pthash {
+
+constexpr uint64_t search_cache_size = 1000;
+
+struct search_logger {
+  search_logger(uint64_t num_keys, uint64_t table_size, uint64_t num_buckets)
+      : m_num_keys(num_keys),
+        m_table_size(table_size),
+        m_num_buckets(num_buckets),
+        m_step(m_num_buckets > 20 ? m_num_buckets / 20 : 1),
+        m_bucket(0),
+        m_placed_keys(0),
+        m_trials(0),
+        m_total_trials(0),
+        m_expected_trials(0.0),
+        m_total_expected_trials(0.0) {}
+
+  void init() {
+    essentials::logger("search starts");
+    m_timer.start();
+  }
+
+  /* If X_i is the random variable counting the number of trials
+   for bucket i, then Pr(X_i <= N - 1) = 1 - (1 - p_i)^N,
+   where p_i is the success probability for bucket i.
+   By solving 1 - (1 - p_i)^N >= T wrt N and for a given target
+   probability T < 1, we obtain N <= log_{1-p_i}(1-T), that is:
+   we get a pilot <= N with probability T.
+   Of course, the closer T is to 1, the higher N becomes.
+   In practice T = 0.65 suffices to have
+      N > # trials per bucket, for all buckets.
+   */
+  double pilot_wp_T(double T, double p) {
+    assert(T > 0 and p > 0);
+    double x = std::log2(1.0 - T) / std::log2(1.0 - p);
+    return round(x);
+  }
+
+  void update(uint64_t bucket, uint64_t bucket_size, uint64_t pilot) {
+    if (bucket > 0) {
+      double base =
+          static_cast<double>(m_table_size - m_placed_keys) / m_table_size;
+      double p = pow(base, bucket_size);
+      double e = 1.0 / p;
+      m_expected_trials += e;
+      m_total_expected_trials += e;
+    }
+
+    m_placed_keys += bucket_size;
+    m_trials += pilot + 1;
+    m_total_trials += pilot + 1;
+
+    if (bucket > 0 and bucket % m_step == 0)
+      print(bucket);
+  }
+
+  void finalize(uint64_t bucket) {
+    m_step = bucket - m_bucket;
+    print(bucket);
+    essentials::logger("search ends");
+    std::cout << " == " << m_num_buckets - bucket << " empty buckets ("
+              << ((m_num_buckets - bucket) * 100.0) / m_num_buckets << "%)"
+              << std::endl;
+    std::cout << " == total trials = " << m_total_trials << std::endl;
+    std::cout << " == total expected trials = "
+              << uint64_t(m_total_expected_trials) << std::endl;
+  }
+
+ private:
+  uint64_t m_num_keys;
+  uint64_t m_table_size;
+  uint64_t m_num_buckets;
+  uint64_t m_step;
+  uint64_t m_bucket;
+  uint64_t m_placed_keys;
+
+  uint64_t m_trials;
+  uint64_t m_total_trials;
+  double m_expected_trials;
+  double m_total_expected_trials;
+
+  essentials::timer<std::chrono::high_resolution_clock, std::chrono::seconds>
+      m_timer;
+
+  void print(uint64_t bucket) {
+    m_timer.stop();
+    std::stringbuf buffer;
+    std::ostream os(&buffer);
+    os << m_step << " buckets done in " << m_timer.elapsed() << " seconds ("
+       << (m_placed_keys * 100.0) / m_num_keys << "% of keys, "
+       << (bucket * 100.0) / m_num_buckets << "% of buckets, "
+       << static_cast<double>(m_trials) / m_step << " trials per bucket, "
+       << m_expected_trials / m_step << " expected trials per bucket)";
+    essentials::logger(buffer.str());
+    m_bucket = bucket;
+    m_trials = 0;
+    m_expected_trials = 0.0;
+    m_timer.reset();
+    m_timer.start();
+  }
+};
+
+template <typename BucketsIterator, typename PilotsBuffer>
+void search_sequential(uint64_t num_keys, uint64_t num_buckets,
+                       uint64_t num_non_empty_buckets, uint64_t seed,
+                       build_configuration const& config,
+                       BucketsIterator& buckets, bit_vector_builder& taken,
+                       PilotsBuffer& pilots) {
+  uint64_t max_bucket_size = (*buckets).size();
+  uint64_t table_size = taken.size();
+  std::vector<uint64_t> positions;
+  positions.reserve(max_bucket_size);
+  __uint128_t M = fastmod::computeM_u64(table_size);
+
+  std::vector<uint64_t> hashed_pilots_cache(search_cache_size);
+  for (uint64_t pilot = 0; pilot != search_cache_size; ++pilot) {
+    hashed_pilots_cache[pilot] = default_hash64(pilot, seed);
+  }
+
+  search_logger log(num_keys, table_size, num_buckets);
+  if (config.verbose_output)
+    log.init();
+
+  uint64_t processed_buckets = 0;
+  for (; processed_buckets < num_non_empty_buckets;
+       ++processed_buckets, ++buckets) {
+    auto const& bucket = *buckets;
+    assert(bucket.size() > 0);
+
+    for (uint64_t pilot = 0; true; ++pilot) {
+      uint64_t hashed_pilot = PTHASH_LIKELY(pilot < search_cache_size)
+                                  ? hashed_pilots_cache[pilot]
+                                  : default_hash64(pilot, seed);
+
+      positions.clear();
+
+      auto bucket_begin = bucket.begin(), bucket_end = bucket.end();
+      for (; bucket_begin != bucket_end; ++bucket_begin) {
+        uint64_t hash = *bucket_begin;
+        uint64_t p = fastmod::fastmod_u64(hash ^ hashed_pilot, M, table_size);
+        if (taken.get(p))
+          break;
+        positions.push_back(p);
+      }
+
+      if (bucket_begin ==
+          bucket_end) {  // all keys do not have collisions with taken
+
+        // check for in-bucket collisions
+        std::sort(positions.begin(), positions.end());
+        auto it = std::adjacent_find(positions.begin(), positions.end());
+        if (it != positions.end())
+          continue;  // in-bucket collision detected, try next pilot
+
+        pilots.emplace_back(bucket.id(), pilot);
+        for (auto p : positions) {
+          assert(taken.get(p) == false);
+          taken.set(p, true);
+        }
+        if (config.verbose_output)
+          log.update(processed_buckets, bucket.size(), pilot);
+        break;
+      }
+    }
+  }
+
+  if (config.verbose_output)
+    log.finalize(processed_buckets);
+}
+
+template <typename BucketsIterator, typename PilotsBuffer>
+void search_parallel(uint64_t num_keys, uint64_t num_buckets,
+                     uint64_t num_non_empty_buckets, uint64_t seed,
+                     build_configuration const& config,
+                     BucketsIterator& buckets, bit_vector_builder& taken,
+                     PilotsBuffer& pilots) {
+  uint64_t max_bucket_size = (*buckets).size();
+  uint64_t table_size = taken.size();
+  __uint128_t M = fastmod::computeM_u64(table_size);
+
+  const uint64_t num_threads = config.num_threads;
+  std::vector<uint64_t> hashed_pilots_cache(search_cache_size);
+  for (uint64_t pilot = 0; pilot != search_cache_size; ++pilot) {
+    hashed_pilots_cache[pilot] = default_hash64(pilot, seed);
+  }
+
+  search_logger log(num_keys, table_size, num_buckets);
+  if (config.verbose_output)
+    log.init();
+
+  volatile uint64_t next_bucket_idx = 0;
+
+  auto exe = [&](uint64_t local_bucket_idx, bucket_t bucket) {
+    std::vector<uint64_t> positions;
+    positions.reserve(max_bucket_size);
+
+    while (true) {
+      uint64_t pilot = 0;
+      bool pilot_checked = false;
+
+      while (true) {
+        uint64_t local_next_bucket_idx = next_bucket_idx;
+
+        for (; true; ++pilot) {
+          if (PTHASH_LIKELY(!pilot_checked)) {
+            uint64_t hashed_pilot = PTHASH_LIKELY(pilot < search_cache_size)
+                                        ? hashed_pilots_cache[pilot]
+                                        : default_hash64(pilot, seed);
+
+            positions.clear();
+
+            auto bucket_begin = bucket.begin(), bucket_end = bucket.end();
+            for (; bucket_begin != bucket_end; ++bucket_begin) {
+              uint64_t hash = *bucket_begin;
+              uint64_t p =
+                  fastmod::fastmod_u64(hash ^ hashed_pilot, M, table_size);
+              if (taken.get(p))
+                break;
+              positions.push_back(p);
+            }
+
+            if (bucket_begin == bucket_end) {
+              std::sort(positions.begin(), positions.end());
+              auto it = std::adjacent_find(positions.begin(), positions.end());
+              if (it != positions.end())
+                continue;
+
+              // I can stop the pilot search as there are not collisions
+              pilot_checked = true;
+              break;
+            }
+          } else {
+            // I already computed the positions and checked the in-bucket
+            // collisions I must only check the bitmap again
+            for (auto p : positions) {
+              if (taken.get(p)) {
+                pilot_checked = false;
+                break;
+              }
+            }
+            // I can stop the pilot search as there are not collisions
+            if (pilot_checked)
+              break;
+          }
+        }
+
+        // I am the first thread: this is the only condition that can stop the
+        // loop
+        if (local_next_bucket_idx == local_bucket_idx)
+          break;
+
+        // active wait until another thread pushes a change in the bitmap
+        while (local_next_bucket_idx == next_bucket_idx)
+          ;
+      }
+      assert(local_bucket_idx == next_bucket_idx);
+
+      /* thread-safe from now on */
+
+      pilots.emplace_back(bucket.id(), pilot);
+      for (auto p : positions) {
+        assert(taken.get(p) == false);
+        taken.set(p, true);
+      }
+      if (config.verbose_output)
+        log.update(local_bucket_idx, bucket.size(), pilot);
+
+      // update (local) local_bucket_idx
+      local_bucket_idx = next_bucket_idx + num_threads;
+
+      if (local_bucket_idx >= num_non_empty_buckets) {  // stop the thread
+        // update (global) next_bucket_idx, which may unlock other threads
+        ++next_bucket_idx;
+        break;
+      }
+
+      // read the next bucket and advance the iterator
+      bucket = (*buckets);
+      ++buckets;
+
+      // update (global) next_bucket_idx, which may unlock other threads
+      ++next_bucket_idx;
+    }
+  };
+
+  std::vector<std::thread> threads;
+  threads.reserve(num_threads);
+  next_bucket_idx = static_cast<uint64_t>(
+      -1);  // avoid that some thread advances the iterator
+  for (uint64_t i = 0; i != num_threads and i < num_non_empty_buckets;
+       ++i, ++buckets) {
+    bucket_t bucket = *buckets;
+    threads.emplace_back(exe, i, bucket);
+  }
+
+  next_bucket_idx = 0;  // notify the first thread
+  for (auto& t : threads) {
+    if (t.joinable())
+      t.join();
+  }
+  assert(next_bucket_idx == num_non_empty_buckets);
+
+  if (config.verbose_output)
+    log.finalize(next_bucket_idx);
+}
+
+template <typename BucketsIterator, typename PilotsBuffer>
+void search(uint64_t num_keys, uint64_t num_buckets,
+            uint64_t num_non_empty_buckets, uint64_t seed,
+            build_configuration const& config, BucketsIterator& buckets,
+            bit_vector_builder& taken, PilotsBuffer& pilots) {
+  if (config.num_threads > 1) {
+    if (config.num_threads > std::thread::hardware_concurrency()) {
+      throw std::invalid_argument(
+          "parallel search should use at most " +
+          std::to_string(std::thread::hardware_concurrency()) + " threads");
+    }
+    search_parallel(num_keys, num_buckets, num_non_empty_buckets, seed, config,
+                    buckets, taken, pilots);
+  } else {
+    search_sequential(num_keys, num_buckets, num_non_empty_buckets, seed,
+                      config, buckets, taken, pilots);
+  }
+}
+
+}  // namespace pthash
diff --git a/thirdparty/pthash/builders/util.hpp b/thirdparty/pthash/builders/util.hpp
new file mode 100644
index 00000000..98ac7c3c
--- /dev/null
+++ b/thirdparty/pthash/builders/util.hpp
@@ -0,0 +1,301 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <fstream>
+#include <thread>
+
+#include "pthash/encoders/bit_vector.hpp"
+#include "pthash/utils/logger.hpp"
+#include "pthash/utils/util.hpp"
+
+namespace pthash {
+
+typedef uint32_t bucket_id_type;
+typedef uint8_t bucket_size_type;
+#define MAX_BUCKET_SIZE static_cast<bucket_size_type>(100)
+
+static inline std::string get_tmp_builder_filename(std::string const& dir_name,
+                                                   uint64_t id) {
+  return dir_name + "/pthash.temp." + std::to_string(id) + ".builder";
+}
+
+struct build_timings {
+  build_timings()
+      : partitioning_seconds(0.0),
+        mapping_ordering_seconds(0.0),
+        searching_seconds(0.0),
+        encoding_seconds(0.0) {}
+
+  double partitioning_seconds;
+  double mapping_ordering_seconds;
+  double searching_seconds;
+  double encoding_seconds;
+};
+
+struct build_configuration {
+  build_configuration()
+      : c(4.5),
+        alpha(0.98),
+        num_partitions(1),
+        num_buckets(constants::invalid_num_buckets),
+        num_threads(1),
+        seed(constants::invalid_seed),
+        ram(static_cast<double>(constants::available_ram) * 0.75),
+        tmp_dir(constants::default_tmp_dirname),
+        minimal_output(false),
+        verbose_output(true) {}
+
+  double c;
+  double alpha;
+  uint64_t num_partitions;
+  uint64_t num_buckets;
+  uint64_t num_threads;
+  uint64_t seed;
+  uint64_t ram;
+  std::string tmp_dir;
+  bool minimal_output;
+  bool verbose_output;
+};
+
+struct seed_runtime_error : public std::runtime_error {
+  seed_runtime_error() : std::runtime_error("seed did not work") {}
+};
+
+#pragma pack(push, 4)
+struct bucket_payload_pair {
+  bucket_id_type bucket_id;
+  uint64_t payload;
+
+  bucket_payload_pair() {}
+  bucket_payload_pair(bucket_id_type bucket_id, uint64_t payload)
+      : bucket_id(bucket_id), payload(payload) {}
+
+  bool operator<(bucket_payload_pair const& other) const {
+    return (bucket_id < other.bucket_id) or
+           (bucket_id == other.bucket_id and payload < other.payload);
+  }
+};
+#pragma pack(pop)
+
+struct bucket_t {
+  bucket_t() : m_begin(nullptr), m_size(0) {}
+
+  void init(uint64_t const* begin, bucket_size_type size) {
+    m_begin = begin;
+    m_size = size;
+  }
+
+  inline bucket_id_type id() const { return *m_begin; }
+
+  inline uint64_t const* begin() const { return m_begin + 1; }
+
+  inline uint64_t const* end() const { return m_begin + 1 + m_size; }
+
+  inline bucket_size_type size() const { return m_size; }
+
+ private:
+  uint64_t const* m_begin;
+  bucket_size_type m_size;
+};
+
+template <typename PairsRandomAccessIterator>
+struct payload_iterator {
+  payload_iterator(PairsRandomAccessIterator const& iterator)
+      : m_iterator(iterator) {}
+
+  uint64_t operator*() const { return (*m_iterator).payload; }
+
+  void operator++() { ++m_iterator; }
+
+ private:
+  PairsRandomAccessIterator m_iterator;
+};
+
+template <typename Pairs, typename Merger>
+void merge_single_block(Pairs const& pairs, Merger& merger, bool verbose) {
+  progress_logger logger(pairs.size(), " == merged ", " pairs", verbose);
+
+  bucket_size_type bucket_size = 1;
+  uint64_t num_pairs = pairs.size();
+  logger.log();
+  for (uint64_t i = 1; i != num_pairs; ++i) {
+    if (pairs[i].bucket_id == pairs[i - 1].bucket_id) {
+      if (PTHASH_LIKELY(pairs[i].payload != pairs[i - 1].payload)) {
+        ++bucket_size;
+      } else {
+        throw seed_runtime_error();
+      }
+    } else {
+      merger.add(pairs[i - 1].bucket_id, bucket_size,
+                 payload_iterator<typename Pairs::const_iterator>(
+                     pairs.begin() + i - bucket_size));
+      bucket_size = 1;
+    }
+    logger.log();
+  }
+
+  // add the last bucket
+  merger.add(pairs[num_pairs - 1].bucket_id, bucket_size,
+             payload_iterator<typename Pairs::const_iterator>(pairs.end() -
+                                                              bucket_size));
+  logger.finalize();
+}
+
+template <typename Pairs, typename Merger>
+void merge_multiple_blocks(std::vector<Pairs> const& pairs_blocks,
+                           Merger& merger, bool verbose) {
+  uint64_t num_pairs = std::accumulate(
+      pairs_blocks.begin(), pairs_blocks.end(), static_cast<uint64_t>(0),
+      [](uint64_t sum, Pairs const& pairs) { return sum + pairs.size(); });
+  progress_logger logger(num_pairs, " == merged ", " pairs", verbose);
+
+  // input iterators and heap
+  std::vector<typename Pairs::const_iterator> iterators;
+  std::vector<uint32_t> idx_heap;
+  iterators.reserve(pairs_blocks.size());
+  idx_heap.reserve(pairs_blocks.size());
+
+  // heap functions
+  auto stdheap_idx_comparator = [&](uint32_t idxa, uint32_t idxb) {
+    return !((*iterators[idxa]) < (*iterators[idxb]));
+  };
+  auto advance_heap_head = [&]() {
+    auto idx = idx_heap[0];
+    ++iterators[idx];
+    if (PTHASH_LIKELY(iterators[idx] != pairs_blocks[idx].end())) {
+      // percolate down the head
+      uint64_t pos = 0;
+      uint64_t size = idx_heap.size();
+      while (2 * pos + 1 < size) {
+        uint64_t i = 2 * pos + 1;
+        if (i + 1 < size and
+            stdheap_idx_comparator(idx_heap[i], idx_heap[i + 1]))
+          ++i;
+        if (stdheap_idx_comparator(idx_heap[i], idx_heap[pos]))
+          break;
+        std::swap(idx_heap[pos], idx_heap[i]);
+        pos = i;
+      }
+    } else {
+      std::pop_heap(idx_heap.begin(), idx_heap.end(), stdheap_idx_comparator);
+      idx_heap.pop_back();
+    }
+  };
+
+  // create the input iterators and the heap
+  for (uint64_t i = 0; i != pairs_blocks.size(); ++i) {
+    iterators.push_back(pairs_blocks[i].begin());
+    idx_heap.push_back(i);
+  }
+  std::make_heap(idx_heap.begin(), idx_heap.end(), stdheap_idx_comparator);
+
+  bucket_id_type bucket_id;
+  std::vector<uint64_t> bucket_payloads;
+  bucket_payloads.reserve(MAX_BUCKET_SIZE);
+
+  // read the first pair
+  {
+    bucket_payload_pair pair = (*iterators[idx_heap[0]]);
+    bucket_id = pair.bucket_id;
+    bucket_payloads.push_back(pair.payload);
+    advance_heap_head();
+    logger.log();
+  }
+
+  // merge
+  for (uint64_t i = 0; (PTHASH_LIKELY(idx_heap.size()));
+       ++i, advance_heap_head()) {
+    bucket_payload_pair pair = (*iterators[idx_heap[0]]);
+
+    if (pair.bucket_id == bucket_id) {
+      if (PTHASH_LIKELY(pair.payload != bucket_payloads.back())) {
+        bucket_payloads.push_back(pair.payload);
+      } else {
+        throw seed_runtime_error();
+      }
+    } else {
+      merger.add(bucket_id, bucket_payloads.size(), bucket_payloads.begin());
+      bucket_id = pair.bucket_id;
+      bucket_payloads.clear();
+      bucket_payloads.push_back(pair.payload);
+    }
+    logger.log();
+  }
+
+  // add the last bucket
+  merger.add(bucket_id, bucket_payloads.size(), bucket_payloads.begin());
+  logger.finalize();
+}
+
+template <typename Pairs, typename Merger>
+void merge(std::vector<Pairs> const& pairs_blocks, Merger& merger,
+           bool verbose) {
+  if (pairs_blocks.size() == 1) {
+    merge_single_block(pairs_blocks[0], merger, verbose);
+  } else {
+    merge_multiple_blocks(pairs_blocks, merger, verbose);
+  }
+}
+
+template <typename FreeSlots>
+void fill_free_slots(bit_vector_builder const& taken, uint64_t num_keys,
+                     FreeSlots& free_slots) {
+  uint64_t table_size = taken.size();
+  if (table_size <= num_keys)
+    return;
+
+  uint64_t next_used_slot = num_keys;
+  uint64_t last_free_slot = 0, last_valid_free_slot = 0;
+
+  while (true) {
+    // find the next free slot (on the left)
+    while (last_free_slot < num_keys && taken.get(last_free_slot))
+      ++last_free_slot;
+    // exit condition
+    if (last_free_slot == num_keys)
+      break;
+    // fill with the last free slot (on the left) until I find a new used slot
+    // (on the right) note: since I found a free slot on the left, there must be
+    // an used slot on the right
+    assert(next_used_slot < table_size);
+    while (!taken.get(next_used_slot)) {
+      free_slots.emplace_back(last_free_slot);
+      ++next_used_slot;
+    }
+    assert(next_used_slot < table_size);
+    // fill the used slot (on the right) with the last free slot and advance all
+    // cursors
+    free_slots.emplace_back(last_free_slot);
+    last_valid_free_slot = last_free_slot;
+    ++next_used_slot;
+    ++last_free_slot;
+  }
+  // fill the tail with the last valid slot that I found
+  while (next_used_slot != table_size) {
+    free_slots.emplace_back(last_valid_free_slot);
+    ++next_used_slot;
+  }
+  assert(next_used_slot == table_size);
+}
+
+}  // namespace pthash
diff --git a/thirdparty/pthash/encoders/bit_vector.hpp b/thirdparty/pthash/encoders/bit_vector.hpp
new file mode 100644
index 00000000..27547a7d
--- /dev/null
+++ b/thirdparty/pthash/encoders/bit_vector.hpp
@@ -0,0 +1,347 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include "pthash/encoders/util.hpp"
+#include "pthash/essentials/essentials.hpp"
+
+namespace pthash {
+
+struct bit_vector_builder {
+  bit_vector_builder(uint64_t size = 0, bool init = 0) : m_size(size) {
+    m_bits.resize(essentials::words_for(size), uint64_t(-init));
+    if (size) {
+      m_cur_word = &m_bits.back();
+      // clear padding bits
+      if (init && (size & 63)) {
+        *m_cur_word >>= 64 - (size & 63);
+      }
+    }
+  }
+
+  void reserve(uint64_t num_bits) {
+    m_bits.reserve(essentials::words_for(num_bits));
+  }
+
+  inline void push_back(bool b) {
+    uint64_t pos_in_word = m_size % 64;
+    if (pos_in_word == 0) {
+      m_bits.push_back(0);
+      m_cur_word = &m_bits.back();
+    }
+    *m_cur_word |= (uint64_t) b << pos_in_word;
+    ++m_size;
+  }
+
+  inline void zero_extend(uint64_t n) {
+    m_size += n;
+    uint64_t needed = essentials::words_for(m_size) - m_bits.size();
+    if (needed) {
+      m_bits.insert(m_bits.end(), needed, 0);
+      m_cur_word = &m_bits.back();
+    }
+  }
+
+  inline void set(uint64_t pos, bool b = true) {
+    assert(pos < size());
+    uint64_t word = pos >> 6;
+    uint64_t pos_in_word = pos & 63;
+    m_bits[word] &= ~(uint64_t(1) << pos_in_word);
+    m_bits[word] |= uint64_t(b) << pos_in_word;
+  }
+
+  inline uint64_t get(uint64_t pos) const {
+    assert(pos < size());
+    uint64_t word = pos >> 6;
+    uint64_t pos_in_word = pos & 63;
+    return m_bits[word] >> pos_in_word & uint64_t(1);
+  }
+
+  inline void set_bits(uint64_t pos, uint64_t bits, size_t len) {
+    assert(pos + len <= size());
+    // check there are no spurious bits
+    assert(len == 64 || (bits >> len) == 0);
+    if (!len)
+      return;
+    uint64_t mask = (len == 64) ? uint64_t(-1) : ((uint64_t(1) << len) - 1);
+    uint64_t word = pos >> 6;
+    uint64_t pos_in_word = pos & 63;
+
+    m_bits[word] &= ~(mask << pos_in_word);
+    m_bits[word] |= bits << pos_in_word;
+
+    uint64_t stored = 64 - pos_in_word;
+    if (stored < len) {
+      m_bits[word + 1] &= ~(mask >> stored);
+      m_bits[word + 1] |= bits >> stored;
+    }
+  }
+
+  inline void append_bits(uint64_t bits, size_t len) {
+    // check there are no spurious bits
+    assert(len == 64 || (bits >> len) == 0);
+    if (!len)
+      return;
+    uint64_t pos_in_word = m_size & 63;
+    m_size += len;
+    if (pos_in_word == 0) {
+      m_bits.push_back(bits);
+    } else {
+      *m_cur_word |= bits << pos_in_word;
+      if (len > 64 - pos_in_word) {
+        m_bits.push_back(bits >> (64 - pos_in_word));
+      }
+    }
+    m_cur_word = &m_bits.back();
+  }
+
+  inline uint64_t get_word64(uint64_t pos) const {
+    assert(pos < size());
+    uint64_t block = pos >> 6;
+    uint64_t shift = pos & 63;
+    uint64_t word = m_bits[block] >> shift;
+    if (shift && block + 1 < m_bits.size()) {
+      word |= m_bits[block + 1] << (64 - shift);
+    }
+    return word;
+  }
+
+  void append(bit_vector_builder const& rhs) {
+    if (!rhs.size())
+      return;
+
+    uint64_t pos = m_bits.size();
+    uint64_t shift = size() % 64;
+    m_size = size() + rhs.size();
+    m_bits.resize(essentials::words_for(m_size));
+
+    if (shift == 0) {  // word-aligned, easy case
+      std::copy(rhs.m_bits.begin(), rhs.m_bits.end(),
+                m_bits.begin() + ptrdiff_t(pos));
+    } else {
+      uint64_t* cur_word = &m_bits.front() + pos - 1;
+      for (size_t i = 0; i < rhs.m_bits.size() - 1; ++i) {
+        uint64_t w = rhs.m_bits[i];
+        *cur_word |= w << shift;
+        *++cur_word = w >> (64 - shift);
+      }
+      *cur_word |= rhs.m_bits.back() << shift;
+      if (cur_word < &m_bits.back()) {
+        *++cur_word = rhs.m_bits.back() >> (64 - shift);
+      }
+    }
+    m_cur_word = &m_bits.back();
+  }
+
+  void resize(uint64_t size) {
+    m_size = size;
+    m_bits.resize(essentials::words_for(m_size));
+  }
+
+  void swap(bit_vector_builder& other) {
+    m_bits.swap(other.m_bits);
+    std::swap(m_size, other.m_size);
+    std::swap(m_cur_word, other.m_cur_word);
+  }
+
+  std::vector<uint64_t>& data() { return m_bits; }
+
+  uint64_t size() const { return m_size; }
+
+ private:
+  std::vector<uint64_t> m_bits;
+  uint64_t m_size;
+  uint64_t* m_cur_word;
+};
+
+struct bit_vector {
+  bit_vector() : m_size(0) {}
+
+  void build(bit_vector_builder* in) {
+    m_size = in->size();
+    m_bits.swap(in->data());
+  }
+
+  bit_vector(bit_vector_builder* in) { build(in); }
+
+  void swap(bit_vector& other) {
+    std::swap(other.m_size, m_size);
+    other.m_bits.swap(m_bits);
+  }
+
+  inline size_t size() const { return m_size; }
+
+  uint64_t bytes() const {
+    return sizeof(m_size) + essentials::vec_bytes(m_bits);
+  }
+
+  // get i-th bit
+  inline uint64_t operator[](uint64_t i) const {
+    assert(i < size());
+    uint64_t block = i >> 6;
+    uint64_t shift = i & 63;
+    return m_bits[block] >> shift & uint64_t(1);
+  }
+
+  inline uint64_t get_bits(uint64_t pos, uint64_t len) const {
+    assert(pos + len <= size());
+    if (!len)
+      return 0;
+    uint64_t block = pos >> 6;
+    uint64_t shift = pos & 63;
+    uint64_t mask = -(len == 64) | ((1ULL << len) - 1);
+    if (shift + len <= 64) {
+      return m_bits[block] >> shift & mask;
+    } else {
+      return (m_bits[block] >> shift) |
+             (m_bits[block + 1] << (64 - shift) & mask);
+    }
+  }
+
+  // fast and unsafe version: it retrieves at least 56 bits
+  inline uint64_t get_word56(uint64_t pos) const {
+    const char* base_ptr = reinterpret_cast<const char*>(m_bits.data());
+    return *(reinterpret_cast<uint64_t const*>(base_ptr + (pos >> 3))) >>
+           (pos & 7);
+  }
+
+  // pad with zeros if extension further size is needed
+  inline uint64_t get_word64(uint64_t pos) const {
+    assert(pos < size());
+    uint64_t block = pos >> 6;
+    uint64_t shift = pos & 63;
+    uint64_t word = m_bits[block] >> shift;
+    if (shift && block + 1 < m_bits.size()) {
+      word |= m_bits[block + 1] << (64 - shift);
+    }
+    return word;
+  }
+
+  inline uint64_t predecessor1(uint64_t pos) const {
+    assert(pos < m_size);
+    uint64_t block = pos / 64;
+    uint64_t shift = 64 - pos % 64 - 1;
+    uint64_t word = m_bits[block];
+    word = (word << shift) >> shift;
+
+    unsigned long ret;
+    while (!util::msb(word, ret)) {
+      assert(block);
+      word = m_bits[--block];
+    };
+    return block * 64 + ret;
+  }
+
+  std::vector<uint64_t> const& data() const { return m_bits; }
+
+  struct unary_iterator {
+    unary_iterator() : m_data(0), m_position(0), m_buf(0) {}
+
+    unary_iterator(bit_vector const& bv, uint64_t pos = 0) {
+      m_data = bv.data().data();
+      m_position = pos;
+      m_buf = m_data[pos >> 6];
+      // clear low bits
+      m_buf &= uint64_t(-1) << (pos & 63);
+    }
+
+    uint64_t position() const { return m_position; }
+
+    uint64_t next() {
+      unsigned long pos_in_word;
+      uint64_t buf = m_buf;
+      while (!util::lsb(buf, pos_in_word)) {
+        m_position += 64;
+        buf = m_data[m_position >> 6];
+      }
+
+      m_buf = buf & (buf - 1);  // clear LSB
+      m_position = (m_position & ~uint64_t(63)) + pos_in_word;
+      return m_position;
+    }
+
+    // skip to the k-th one after the current position
+    void skip(uint64_t k) {
+      uint64_t skipped = 0;
+      uint64_t buf = m_buf;
+      uint64_t w = 0;
+      while (skipped + (w = util::popcount(buf)) <= k) {
+        skipped += w;
+        m_position += 64;
+        buf = m_data[m_position / 64];
+      }
+      assert(buf);
+      uint64_t pos_in_word = util::select_in_word(buf, k - skipped);
+      m_buf = buf & (uint64_t(-1) << pos_in_word);
+      m_position = (m_position & ~uint64_t(63)) + pos_in_word;
+    }
+
+    // skip to the k-th zero after the current position
+    void skip0(uint64_t k) {
+      uint64_t skipped = 0;
+      uint64_t pos_in_word = m_position % 64;
+      uint64_t buf = ~m_buf & (uint64_t(-1) << pos_in_word);
+      uint64_t w = 0;
+      while (skipped + (w = util::popcount(buf)) <= k) {
+        skipped += w;
+        m_position += 64;
+        buf = ~m_data[m_position / 64];
+      }
+      assert(buf);
+      pos_in_word = util::select_in_word(buf, k - skipped);
+      m_buf = ~buf & (uint64_t(-1) << pos_in_word);
+      m_position = (m_position & ~uint64_t(63)) + pos_in_word;
+    }
+
+   private:
+    uint64_t const* m_data;
+    uint64_t m_position;
+    uint64_t m_buf;
+  };
+
+  template <typename Visitor>
+  void visit(Visitor& visitor) {
+    visitor.visit(m_size);
+    visitor.visit(m_bits);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load(m_size);
+    loader.load_vec(m_bits);
+  }
+
+  template <typename Dumper>
+  void dump(Dumper& dumper) const {
+    dumper.dump(m_size);
+    dumper.dump_vec(m_bits);
+  }
+
+ protected:
+  size_t m_size;
+  std::vector<uint64_t> m_bits;
+};
+
+}  // namespace pthash
diff --git a/thirdparty/pthash/encoders/compact_vector.hpp b/thirdparty/pthash/encoders/compact_vector.hpp
new file mode 100644
index 00000000..b2ec2a69
--- /dev/null
+++ b/thirdparty/pthash/encoders/compact_vector.hpp
@@ -0,0 +1,306 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cmath>
+#include <vector>
+
+namespace pthash {
+
+struct compact_vector {
+  template <typename Data>
+  struct enumerator {
+    enumerator() {}
+
+    enumerator(Data const* data, uint64_t i = 0)
+        : m_i(i),
+          m_cur_val(0),
+          m_cur_block((i * data->m_width) >> 6),
+          m_cur_shift((i * data->m_width) & 63),
+          m_data(data) {}
+
+    uint64_t operator*() {
+      read();
+      return m_cur_val;
+    }
+
+    enumerator& operator++() {
+      ++m_i;
+      return *this;
+    }
+
+    inline uint64_t value() {
+      read();
+      return m_cur_val;
+    }
+
+    inline void next() { ++m_i; }
+
+    bool operator==(enumerator const& other) const { return m_i == other.m_i; }
+
+    bool operator!=(enumerator const& other) const { return !(*this == other); }
+
+   private:
+    uint64_t m_i;
+    uint64_t m_cur_val;
+    uint64_t m_cur_block;
+    int64_t m_cur_shift;
+    Data const* m_data;
+
+    void read() {
+      if (m_cur_shift + m_data->m_width <= 64) {
+        m_cur_val = m_data->m_bits[m_cur_block] >> m_cur_shift & m_data->m_mask;
+      } else {
+        uint64_t res_shift = 64 - m_cur_shift;
+        m_cur_val =
+            (m_data->m_bits[m_cur_block] >> m_cur_shift) |
+            (m_data->m_bits[m_cur_block + 1] << res_shift & m_data->m_mask);
+        ++m_cur_block;
+        m_cur_shift = -res_shift;
+      }
+
+      m_cur_shift += m_data->m_width;
+
+      if (m_cur_shift == 64) {
+        m_cur_shift = 0;
+        ++m_cur_block;
+      }
+    }
+  };
+
+  struct builder {
+    builder()
+        : m_size(0),
+          m_width(0),
+          m_mask(0),
+          m_back(0),
+          m_cur_block(0),
+          m_cur_shift(0) {}
+
+    builder(uint64_t n, uint64_t w) { resize(n, w); }
+
+    void resize(size_t n, uint64_t w) {
+      m_size = n;
+      m_width = w;
+      m_mask = -(w == 64) | ((uint64_t(1) << w) - 1);
+      m_back = 0;
+      m_cur_block = 0;
+      m_cur_shift = 0;
+      m_bits.resize(
+          /* use 1 word more for safe access() */
+          essentials::words_for(m_size * m_width) + 1, 0);
+    }
+
+    template <typename Iterator>
+    builder(Iterator begin, uint64_t n, uint64_t w) : builder(n, w) {
+      fill(begin, n);
+    }
+
+    template <typename Iterator>
+    void fill(Iterator begin, uint64_t n) {
+      if (!m_width)
+        throw std::runtime_error("width must be greater than 0");
+      for (uint64_t i = 0; i != n; ++i, ++begin)
+        push_back(*begin);
+    }
+
+    void set(uint64_t i, uint64_t v) {
+      assert(m_width);
+      assert(i < m_size);
+      if (i == m_size - 1)
+        m_back = v;
+
+      uint64_t pos = i * m_width;
+      uint64_t block = pos >> 6;
+      uint64_t shift = pos & 63;
+
+      m_bits[block] &= ~(m_mask << shift);
+      m_bits[block] |= v << shift;
+
+      uint64_t res_shift = 64 - shift;
+      if (res_shift < m_width) {
+        m_bits[block + 1] &= ~(m_mask >> res_shift);
+        m_bits[block + 1] |= v >> res_shift;
+      }
+    }
+
+    void push_back(uint64_t v) {
+      assert(m_width);
+      m_back = v;
+      m_bits[m_cur_block] &= ~(m_mask << m_cur_shift);
+      m_bits[m_cur_block] |= v << m_cur_shift;
+
+      uint64_t res_shift = 64 - m_cur_shift;
+      if (res_shift < m_width) {
+        ++m_cur_block;
+        m_bits[m_cur_block] &= ~(m_mask >> res_shift);
+        m_bits[m_cur_block] |= v >> res_shift;
+        m_cur_shift = -res_shift;
+      }
+
+      m_cur_shift += m_width;
+
+      if (m_cur_shift == 64) {
+        m_cur_shift = 0;
+        ++m_cur_block;
+      }
+    }
+
+    friend struct enumerator<builder>;
+
+    typedef enumerator<builder> iterator;
+
+    iterator begin() const { return iterator(this); }
+
+    iterator end() const { return iterator(this, size()); }
+
+    void build(compact_vector& cv) {
+      cv.m_size = m_size;
+      cv.m_width = m_width;
+      cv.m_mask = m_mask;
+      cv.m_bits.swap(m_bits);
+      builder().swap(*this);
+    }
+
+    void swap(compact_vector::builder& other) {
+      std::swap(m_size, other.m_size);
+      std::swap(m_width, other.m_width);
+      std::swap(m_mask, other.m_mask);
+      std::swap(m_cur_block, other.m_cur_block);
+      std::swap(m_cur_shift, other.m_cur_shift);
+      m_bits.swap(other.m_bits);
+    }
+
+    uint64_t back() const { return m_back; }
+
+    uint64_t size() const { return m_size; }
+
+    uint64_t width() const { return m_width; }
+
+    std::vector<uint64_t>& bits() { return m_bits; }
+
+   private:
+    uint64_t m_size;
+    uint64_t m_width;
+    uint64_t m_mask;
+    uint64_t m_back;
+    uint64_t m_cur_block;
+    int64_t m_cur_shift;
+    std::vector<uint64_t> m_bits;
+  };
+
+  compact_vector() : m_size(0), m_width(0), m_mask(0) {}
+
+  template <typename Iterator>
+  void build(Iterator begin, uint64_t n) {
+    assert(n > 0);
+    uint64_t max = *std::max_element(begin, begin + n);
+    uint64_t width = max == 0 ? 1 : std::ceil(std::log2(max + 1));
+    build(begin, n, width);
+  }
+
+  template <typename Iterator>
+  void build(Iterator begin, uint64_t n, uint64_t w) {
+    compact_vector::builder builder(begin, n, w);
+    builder.build(*this);
+  }
+
+  inline uint64_t operator[](uint64_t i) const {
+    assert(i < size());
+    uint64_t pos = i * m_width;
+    uint64_t block = pos >> 6;
+    uint64_t shift = pos & 63;
+    return shift + m_width <= 64
+               ? m_bits[block] >> shift & m_mask
+               : (m_bits[block] >> shift) |
+                     (m_bits[block + 1] << (64 - shift) & m_mask);
+  }
+
+  // it retrieves at least 57 bits
+  inline uint64_t access(uint64_t pos) const {
+    assert(pos < size());
+    uint64_t i = pos * m_width;
+    const char* ptr = reinterpret_cast<const char*>(m_bits.data());
+    return (*(reinterpret_cast<uint64_t const*>(ptr + (i >> 3))) >> (i & 7)) &
+           m_mask;
+  }
+
+  uint64_t back() const { return operator[](size() - 1); }
+
+  inline uint64_t size() const { return m_size; }
+
+  inline uint64_t width() const { return m_width; }
+
+  typedef enumerator<compact_vector> iterator;
+
+  iterator begin() const { return iterator(this); }
+
+  iterator end() const { return iterator(this, size()); }
+
+  iterator at(uint64_t pos) const { return iterator(this, pos); }
+
+  std::vector<uint64_t> const& bits() const { return m_bits; }
+
+  size_t bytes() const {
+    return sizeof(m_size) + sizeof(m_width) + sizeof(m_mask) +
+           essentials::vec_bytes(m_bits);
+  }
+
+  void swap(compact_vector& other) {
+    std::swap(m_size, other.m_size);
+    std::swap(m_width, other.m_width);
+    std::swap(m_mask, other.m_mask);
+    m_bits.swap(other.m_bits);
+  }
+
+  template <typename Visitor>
+  void visit(Visitor& visitor) {
+    visitor.visit(m_size);
+    visitor.visit(m_width);
+    visitor.visit(m_mask);
+    visitor.visit(m_bits);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load(m_size);
+    loader.load(m_width);
+    loader.load(m_mask);
+    loader.load_vec(m_bits);
+  }
+
+  template <typename Dumper>
+  void dump(Dumper& dumper) const {
+    dumper.dump(m_size);
+    dumper.dump(m_width);
+    dumper.dump(m_mask);
+    dumper.dump_vec(m_bits);
+  }
+
+ private:
+  uint64_t m_size;
+  uint64_t m_width;
+  uint64_t m_mask;
+  std::vector<uint64_t> m_bits;
+};
+
+}  // namespace pthash
\ No newline at end of file
diff --git a/thirdparty/pthash/encoders/darray.hpp b/thirdparty/pthash/encoders/darray.hpp
new file mode 100644
index 00000000..48de5991
--- /dev/null
+++ b/thirdparty/pthash/encoders/darray.hpp
@@ -0,0 +1,185 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "pthash/encoders/bit_vector.hpp"
+#include "pthash/encoders/util.hpp"
+
+namespace pthash {
+namespace detail {
+
+template <typename WordGetter>
+struct darray {
+  darray() : m_positions() {}
+
+  darray(bit_vector const& bv) : m_positions() {
+    std::vector<uint64_t> const& data = bv.data();
+    std::vector<uint64_t> cur_block_positions;
+    std::vector<int64_t> block_inventory;
+    std::vector<uint16_t> subblock_inventory;
+    std::vector<uint64_t> overflow_positions;
+
+    for (size_t word_idx = 0; word_idx < data.size(); ++word_idx) {
+      size_t cur_pos = word_idx << 6;
+      uint64_t cur_word = WordGetter()(data, word_idx);
+      unsigned long l;
+      while (util::lsb(cur_word, l)) {
+        cur_pos += l;
+        cur_word >>= l;
+        if (cur_pos >= bv.size())
+          break;
+
+        cur_block_positions.push_back(cur_pos);
+
+        if (cur_block_positions.size() == block_size) {
+          flush_cur_block(cur_block_positions, block_inventory,
+                          subblock_inventory, overflow_positions);
+        }
+
+        // can't do >>= l + 1, can be 64
+        cur_word >>= 1;
+        cur_pos += 1;
+        m_positions += 1;
+      }
+    }
+    if (cur_block_positions.size()) {
+      flush_cur_block(cur_block_positions, block_inventory, subblock_inventory,
+                      overflow_positions);
+    }
+    m_block_inventory.swap(block_inventory);
+    m_subblock_inventory.swap(subblock_inventory);
+    m_overflow_positions.swap(overflow_positions);
+  }
+
+  void swap(darray& other) {
+    std::swap(other.m_positions, m_positions);
+    m_block_inventory.swap(other.m_block_inventory);
+    m_subblock_inventory.swap(other.m_subblock_inventory);
+    m_overflow_positions.swap(other.m_overflow_positions);
+  }
+
+  inline uint64_t select(bit_vector const& bv, uint64_t idx) const {
+    assert(idx < num_positions());
+    uint64_t block = idx / block_size;
+    int64_t block_pos = m_block_inventory[block];
+    if (block_pos < 0) {  // sparse super-block
+      uint64_t overflow_pos = uint64_t(-block_pos - 1);
+      return m_overflow_positions[overflow_pos + (idx & (block_size - 1))];
+    }
+
+    size_t subblock = idx / subblock_size;
+    size_t start_pos = uint64_t(block_pos) + m_subblock_inventory[subblock];
+    size_t reminder = idx & (subblock_size - 1);
+    if (!reminder)
+      return start_pos;
+
+    std::vector<uint64_t> const& data = bv.data();
+    size_t word_idx = start_pos >> 6;
+    size_t word_shift = start_pos & 63;
+    uint64_t word = WordGetter()(data, word_idx) & (uint64_t(-1) << word_shift);
+    while (true) {
+      size_t popcnt = util::popcount(word);
+      if (reminder < popcnt)
+        break;
+      reminder -= popcnt;
+      word = WordGetter()(data, ++word_idx);
+    }
+    return (word_idx << 6) + util::select_in_word(word, reminder);
+  }
+
+  inline uint64_t num_positions() const { return m_positions; }
+
+  uint64_t bytes() const {
+    return sizeof(m_positions) + essentials::vec_bytes(m_block_inventory) +
+           essentials::vec_bytes(m_subblock_inventory) +
+           essentials::vec_bytes(m_overflow_positions);
+  }
+
+  template <typename Visitor>
+  void visit(Visitor& visitor) {
+    visitor.visit(m_positions);
+    visitor.visit(m_block_inventory);
+    visitor.visit(m_subblock_inventory);
+    visitor.visit(m_overflow_positions);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load(m_positions);
+    loader.load_vec(m_block_inventory);
+    loader.load_vec(m_subblock_inventory);
+    loader.load_vec(m_overflow_positions);
+  }
+
+  template <typename Dumper>
+  void dump(Dumper& dumper) const {
+    dumper.dump(m_positions);
+    dumper.dump_vec(m_block_inventory);
+    dumper.dump_vec(m_subblock_inventory);
+    dumper.dump_vec(m_overflow_positions);
+  }
+
+ protected:
+  static void flush_cur_block(std::vector<uint64_t>& cur_block_positions,
+                              std::vector<int64_t>& block_inventory,
+                              std::vector<uint16_t>& subblock_inventory,
+                              std::vector<uint64_t>& overflow_positions) {
+    if (cur_block_positions.back() - cur_block_positions.front() <
+        max_in_block_distance) {
+      block_inventory.push_back(int64_t(cur_block_positions.front()));
+      for (size_t i = 0; i < cur_block_positions.size(); i += subblock_size) {
+        subblock_inventory.push_back(
+            uint16_t(cur_block_positions[i] - cur_block_positions.front()));
+      }
+    } else {
+      block_inventory.push_back(-int64_t(overflow_positions.size()) - 1);
+      for (size_t i = 0; i < cur_block_positions.size(); ++i) {
+        overflow_positions.push_back(cur_block_positions[i]);
+      }
+      for (size_t i = 0; i < cur_block_positions.size(); i += subblock_size) {
+        subblock_inventory.push_back(uint16_t(-1));
+      }
+    }
+    cur_block_positions.clear();
+  }
+
+  static const size_t block_size = 1024;  // 2048
+  static const size_t subblock_size = 32;
+  static const size_t max_in_block_distance = 1 << 16;
+
+  size_t m_positions;
+  std::vector<int64_t> m_block_inventory;
+  std::vector<uint16_t> m_subblock_inventory;
+  std::vector<uint64_t> m_overflow_positions;
+};
+
+struct identity_getter {
+  uint64_t operator()(std::vector<uint64_t> const& data, size_t idx) const {
+    return data[idx];
+  }
+};
+
+}  // namespace detail
+
+typedef detail::darray<detail::identity_getter> darray1;
+
+}  // namespace pthash
diff --git a/thirdparty/pthash/encoders/ef_sequence.hpp b/thirdparty/pthash/encoders/ef_sequence.hpp
new file mode 100644
index 00000000..6c71ca17
--- /dev/null
+++ b/thirdparty/pthash/encoders/ef_sequence.hpp
@@ -0,0 +1,145 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "pthash/encoders/bit_vector.hpp"
+#include "pthash/encoders/compact_vector.hpp"
+#include "pthash/encoders/darray.hpp"
+
+namespace pthash {
+
+template <bool encode_prefix_sum = false>
+struct ef_sequence {
+  ef_sequence() {}
+
+  template <typename Iterator>
+  void encode(Iterator begin, uint64_t n) {
+    if (n == 0)
+      return;
+    uint64_t u;
+#if __cplusplus >= 201703L
+    if constexpr (encode_prefix_sum) {
+#else
+    if (encode_prefix_sum) {
+#endif
+      u = std::accumulate(begin, begin + n, static_cast<uint64_t>(0));
+      n = n + 1;  // because I will add a zero at the beginning
+    } else {
+      u = *(begin + n - 1);
+    };
+
+    uint64_t l = uint64_t((n && u / n) ? util::msb(u / n) : 0);
+    bit_vector_builder bvb_high_bits(n + (u >> l) + 1);
+    compact_vector::builder cv_builder_low_bits(n, l);
+
+    uint64_t low_mask = (uint64_t(1) << l) - 1;
+    uint64_t last = 0;
+    // I add a zero at the beginning
+#if __cplusplus >= 201703L
+    if constexpr (encode_prefix_sum) {
+#else
+    if (encode_prefix_sum) {
+#endif
+      if (l)
+        cv_builder_low_bits.push_back(0);
+      bvb_high_bits.set(0, 1);
+      n = n - 1;  // restore n
+    }
+    for (size_t i = 0; i < n; ++i, ++begin) {
+      auto v = *begin;
+#if __cplusplus >= 201703L
+      if constexpr (encode_prefix_sum) {
+#else
+      if (encode_prefix_sum) {
+#endif
+        v = v + last;               // prefix sum
+      } else if (i and v < last) {  // check the order
+        std::cerr << "error at " << i << "/" << n << ":\n";
+        std::cerr << "last " << last << "\n";
+        std::cerr << "current " << v << "\n";
+        throw std::runtime_error("ef_sequence is not sorted");
+      }
+      if (l)
+        cv_builder_low_bits.push_back(v & low_mask);
+      bvb_high_bits.set((v >> l) + i + encode_prefix_sum, 1);
+      last = v;
+    }
+
+    bit_vector(&bvb_high_bits).swap(m_high_bits);
+    cv_builder_low_bits.build(m_low_bits);
+    darray1(m_high_bits).swap(m_high_bits_d1);
+  }
+
+  inline uint64_t access(uint64_t i) const {
+    assert(i < size());
+    return ((m_high_bits_d1.select(m_high_bits, i) - i) << m_low_bits.width()) |
+           m_low_bits.access(i);
+  }
+
+  inline uint64_t diff(uint64_t i) const {
+    assert(i < size() && encode_prefix_sum);
+    uint64_t low1 = m_low_bits.access(i);
+    uint64_t low2 = m_low_bits.access(i + 1);
+    uint64_t l = m_low_bits.width();
+    uint64_t pos = m_high_bits_d1.select(m_high_bits, i);
+    uint64_t h1 = pos - i;
+    uint64_t h2 =
+        bit_vector::unary_iterator(m_high_bits, pos + 1).next() - i - 1;
+    uint64_t val1 = (h1 << l) | low1;
+    uint64_t val2 = (h2 << l) | low2;
+    return val2 - val1;
+  }
+
+  inline uint64_t size() const { return m_low_bits.size(); }
+
+  uint64_t num_bits() const {
+    return 8 *
+           (m_high_bits.bytes() + m_high_bits_d1.bytes() + m_low_bits.bytes());
+  }
+
+  template <typename Visitor>
+  void visit(Visitor& visitor) {
+    visitor.visit(m_high_bits);
+    visitor.visit(m_high_bits_d1);
+    visitor.visit(m_low_bits);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    m_high_bits.load(loader);
+    m_high_bits_d1.load(loader);
+    m_low_bits.load(loader);
+  }
+
+  template <typename Dumper>
+  void dump(Dumper& dumper) const {
+    m_high_bits.dump(dumper);
+    m_high_bits_d1.dump(dumper);
+    m_low_bits.dump(dumper);
+  }
+
+ private:
+  bit_vector m_high_bits;
+  darray1 m_high_bits_d1;
+  compact_vector m_low_bits;
+};
+
+}  // namespace pthash
diff --git a/thirdparty/pthash/encoders/encoders.hpp b/thirdparty/pthash/encoders/encoders.hpp
new file mode 100644
index 00000000..422119a5
--- /dev/null
+++ b/thirdparty/pthash/encoders/encoders.hpp
@@ -0,0 +1,161 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "pthash/essentials/essentials.hpp"
+
+#include "pthash/encoders/compact_vector.hpp"
+#include "pthash/encoders/ef_sequence.hpp"
+
+#include <cassert>
+#include <unordered_map>
+#include <vector>
+
+namespace pthash {
+
+template <typename Iterator>
+std::pair<std::vector<uint64_t>, std::vector<uint64_t>>
+compute_ranks_and_dictionary(Iterator begin, uint64_t n) {
+  // accumulate frequencies
+  std::unordered_map<uint64_t, uint64_t> distinct;
+  for (auto it = begin, end = begin + n; it != end; ++it) {
+    auto find_it = distinct.find(*it);
+    if (find_it != distinct.end()) {  // found
+      (*find_it).second += 1;
+    } else {
+      distinct[*it] = 1;
+    }
+  }
+  std::vector<std::pair<uint64_t, uint64_t>> vec;
+  vec.reserve(distinct.size());
+  for (auto p : distinct)
+    vec.emplace_back(p.first, p.second);
+  std::sort(vec.begin(), vec.end(),
+            [](const std::pair<uint64_t, uint64_t>& x,
+               const std::pair<uint64_t, uint64_t>& y) {
+              return x.second > y.second;
+            });
+  distinct.clear();
+  // assign codewords by non-increasing frequency
+  std::vector<uint64_t> dict;
+  dict.reserve(distinct.size());
+  for (uint64_t i = 0; i != vec.size(); ++i) {
+    auto p = vec[i];
+    distinct.insert({p.first, i});
+    dict.push_back(p.first);
+  }
+
+  std::vector<uint64_t> ranks;
+  ranks.reserve(n);
+  for (auto it = begin, end = begin + n; it != end; ++it)
+    ranks.push_back(distinct[*it]);
+  return {ranks, dict};
+}
+
+struct dictionary {
+  template <typename Iterator>
+  void encode(Iterator begin, uint64_t n) {
+    auto pair = compute_ranks_and_dictionary(begin, n);
+    m_ranks.build(pair.first.begin(), pair.first.size());
+    m_dict.build(pair.second.begin(), pair.second.size());
+  }
+
+  static std::string name() { return "dictionary"; }
+
+  size_t size() const { return m_ranks.size(); }
+
+  size_t num_bits() const { return (m_ranks.bytes() + m_dict.bytes()) * 8; }
+
+  uint64_t access(uint64_t i) const {
+    uint64_t rank = m_ranks.access(i);
+    return m_dict.access(rank);
+  }
+
+  template <typename Visitor>
+  void visit(Visitor& visitor) {
+    visitor.visit(m_ranks);
+    visitor.visit(m_dict);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    m_ranks.load(loader);
+    m_dict.load(loader);
+  }
+
+  template <typename Dumper>
+  void dump(Dumper& dumper) const {
+    m_ranks.dump(dumper);
+    m_dict.dump(dumper);
+  }
+
+ private:
+  compact_vector m_ranks;
+  compact_vector m_dict;
+};
+
+template <typename Front, typename Back>
+struct dual {
+  template <typename Iterator>
+  void encode(Iterator begin, uint64_t n) {
+    size_t front_size = n * 0.3;
+    m_front.encode(begin, front_size);
+    m_back.encode(begin + front_size, n - front_size);
+  }
+
+  static std::string name() { return Front::name() + "-" + Back::name(); }
+
+  size_t num_bits() const { return m_front.num_bits() + m_back.num_bits(); }
+
+  uint64_t access(uint64_t i) const {
+    if (i < m_front.size())
+      return m_front.access(i);
+    return m_back.access(i - m_front.size());
+  }
+
+  template <typename Visitor>
+  void visit(Visitor& visitor) {
+    visitor.visit(m_front);
+    visitor.visit(m_back);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    m_front.load(loader);
+    m_back.load(loader);
+  }
+
+  template <typename Dumper>
+  void dump(Dumper& dumper) const {
+    m_front.dump(dumper);
+    m_back.dump(dumper);
+  }
+
+ private:
+  Front m_front;
+  Back m_back;
+};
+
+/* dual encoders */
+typedef dual<dictionary, dictionary> dictionary_dictionary;
+
+}  // namespace pthash
diff --git a/thirdparty/pthash/encoders/util.hpp b/thirdparty/pthash/encoders/util.hpp
new file mode 100644
index 00000000..6f53b018
--- /dev/null
+++ b/thirdparty/pthash/encoders/util.hpp
@@ -0,0 +1,114 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+
+#if defined(__x86_64__) && __SSE4_2__
+#include <immintrin.h>
+#endif
+
+namespace pthash::util {
+
+#if defined(__x86_64__) && __SSE4_2__
+template <typename T>
+inline void prefetch(T const* ptr) {
+  _mm_prefetch(reinterpret_cast<const char*>(ptr), _MM_HINT_T0);
+}
+#endif
+
+inline uint8_t msb(uint64_t x) {
+  assert(x);
+  unsigned long ret = -1U;
+  if (x) {
+    ret = (unsigned long) (63 - __builtin_clzll(x));
+  }
+  return (uint8_t) ret;
+}
+
+inline bool bsr64(unsigned long* const index, const uint64_t mask) {
+  if (mask) {
+    *index = (unsigned long) (63 - __builtin_clzll(mask));
+    return true;
+  } else {
+    return false;
+  }
+}
+
+inline uint8_t msb(uint64_t x, unsigned long& ret) { return bsr64(&ret, x); }
+
+inline uint8_t lsb(uint64_t x, unsigned long& ret) {
+  if (x) {
+    ret = (unsigned long) __builtin_ctzll(x);
+    return true;
+  }
+  return false;
+}
+
+inline uint8_t lsb(uint64_t x) {
+  assert(x);
+  unsigned long ret = -1U;
+  lsb(x, ret);
+  return (uint8_t) ret;
+}
+
+inline uint64_t popcount(uint64_t x) {
+#ifdef __SSE4_2__
+  return static_cast<uint64_t>(_mm_popcnt_u64(x));
+#elif __cplusplus >= 202002L
+  return std::popcount(x);
+#else
+  return static_cast<uint64_t>(__builtin_popcountll(x));
+#endif
+}
+
+inline uint64_t select64_pdep_tzcnt(uint64_t x, const uint64_t k) {
+#if defined(__x86_64__) && defined(__BMI2__) || defined(__AVX2__)
+  uint64_t i = 1ULL << k;
+  asm("pdep %[x], %[mask], %[x]" : [x] "+r"(x) : [mask] "r"(i));
+  asm("tzcnt %[bit], %[index]" : [index] "=r"(i) : [bit] "g"(x) : "cc");
+  return i;
+#else
+  uint64_t count = 0;
+  uint64_t result = 0;
+
+  for (uint64_t bit = 0; bit < 64; ++bit) {
+    if ((x >> bit) & 1) {
+      if (count == k) {
+        result = bit;
+        break;
+      }
+      ++count;
+    }
+  }
+
+  return result;
+#endif
+}
+
+inline uint64_t select_in_word(const uint64_t x, const uint64_t k) {
+  assert(k < popcount(x));
+  return select64_pdep_tzcnt(x, k);
+}
+
+}  // namespace pthash::util
\ No newline at end of file
diff --git a/thirdparty/pthash/essentials/essentials.hpp b/thirdparty/pthash/essentials/essentials.hpp
new file mode 100644
index 00000000..aeed8e06
--- /dev/null
+++ b/thirdparty/pthash/essentials/essentials.hpp
@@ -0,0 +1,644 @@
+/** Copyright 2019-2021 Giulio Ermanno Pibiri
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * C++ Essentials:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/essentials
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <dirent.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <algorithm>
+#include <cassert>
+#include <chrono>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <locale>
+#include <numeric>
+#include <random>
+#include <type_traits>
+#include <vector>
+
+#ifdef __GNUG__
+#include <cxxabi.h>  // for name demangling
+#endif
+
+namespace essentials {
+
+inline void logger(std::string const& msg) {
+  time_t t = std::time(nullptr);
+  std::locale loc;
+  const std::time_put<char>& tp = std::use_facet<std::time_put<char>>(loc);
+  const char* fmt = "%F %T";
+  tp.put(std::cout, std::cout, ' ', std::localtime(&t), fmt, fmt + strlen(fmt));
+  std::cout << ": " << msg << std::endl;
+}
+
+static const uint64_t GB = 1000 * 1000 * 1000;
+static const uint64_t GiB = uint64_t(1) << 30;
+static const uint64_t MB = 1000 * 1000;
+static const uint64_t MiB = uint64_t(1) << 20;
+static const uint64_t KB = 1000;
+static const uint64_t KiB = uint64_t(1) << 10;
+
+inline double convert(size_t bytes, uint64_t unit) {
+  return static_cast<double>(bytes) / unit;
+}
+
+template <typename T>
+size_t vec_bytes(T const& vec) {
+  return vec.size() * sizeof(vec.front()) + sizeof(typename T::size_type);
+}
+
+template <typename T>
+size_t pod_bytes(T const& pod) {
+  static_assert(std::is_pod<T>::value);
+  return sizeof(pod);
+}
+
+inline size_t file_size(char const* filename) {
+  std::ifstream is(filename, std::ios::binary | std::ios::ate);
+  if (!is.good()) {
+    throw std::runtime_error(
+        "Error in opening binary "
+        "file.");
+  }
+  size_t bytes = (size_t) is.tellg();
+  is.close();
+  return bytes;
+}
+
+template <typename WordType = uint64_t>
+uint64_t words_for(uint64_t bits) {
+  uint64_t word_bits = sizeof(WordType) * 8;
+  return (bits + word_bits - 1) / word_bits;
+}
+
+template <typename T>
+inline void do_not_optimize_away(T&& value) {
+  asm volatile("" : "+r"(value));
+}
+
+inline uint64_t maxrss_in_bytes() {
+  struct rusage ru;
+  if (getrusage(RUSAGE_SELF, &ru) == 0) {
+    // NOTE: ru_maxrss is in kilobytes on Linux, but not on Apple...
+#ifdef __APPLE__
+    return ru.ru_maxrss;
+#endif
+    return ru.ru_maxrss * 1000;
+  }
+  return 0;
+}
+
+template <typename T>
+void load_pod(std::istream& is, T& val) {
+  static_assert(std::is_pod<T>::value);
+  is.read(reinterpret_cast<char*>(&val), sizeof(T));
+}
+
+template <typename T, typename Allocator>
+void load_vec(std::istream& is, std::vector<T, Allocator>& vec) {
+  size_t n;
+  load_pod(is, n);
+  vec.resize(n);
+  is.read(reinterpret_cast<char*>(vec.data()),
+          static_cast<std::streamsize>(sizeof(T) * n));
+}
+
+template <typename T>
+void save_pod(std::ostream& os, T const& val) {
+  static_assert(std::is_pod<T>::value);
+  os.write(reinterpret_cast<char const*>(&val), sizeof(T));
+}
+
+template <typename T, typename Allocator>
+void save_vec(std::ostream& os, std::vector<T, Allocator> const& vec) {
+  static_assert(std::is_pod<T>::value);
+  size_t n = vec.size();
+  save_pod(os, n);
+  os.write(reinterpret_cast<char const*>(vec.data()),
+           static_cast<std::streamsize>(sizeof(T) * n));
+}
+
+template <typename ClockType, typename DurationType>
+struct timer {
+  void start() { m_start = ClockType::now(); }
+
+  void stop() {
+    m_stop = ClockType::now();
+    auto elapsed = std::chrono::duration_cast<DurationType>(m_stop - m_start);
+    m_timings.push_back(elapsed.count());
+  }
+
+  size_t runs() const { return m_timings.size(); }
+
+  void reset() { m_timings.clear(); }
+
+  double min() const {
+    return *std::min_element(m_timings.begin(), m_timings.end());
+  }
+
+  double max() const {
+    return *std::max_element(m_timings.begin(), m_timings.end());
+  }
+
+  void discard_first() {
+    if (runs()) {
+      m_timings.erase(m_timings.begin());
+    }
+  }
+
+  void discard_min() {
+    if (runs() > 1) {
+      m_timings.erase(std::min_element(m_timings.begin(), m_timings.end()));
+    }
+  }
+
+  void discard_max() {
+    if (runs() > 1) {
+      m_timings.erase(std::max_element(m_timings.begin(), m_timings.end()));
+    }
+  }
+
+  double elapsed() {
+    return std::accumulate(m_timings.begin(), m_timings.end(), 0.0);
+  }
+
+  double average() { return elapsed() / runs(); }
+
+ private:
+  typename ClockType::time_point m_start;
+  typename ClockType::time_point m_stop;
+  std::vector<double> m_timings;
+};
+
+typedef std::chrono::high_resolution_clock clock_type;
+typedef std::chrono::microseconds duration_type;
+typedef timer<clock_type, duration_type> timer_type;
+
+inline unsigned get_random_seed() {
+  return std::chrono::system_clock::now().time_since_epoch().count();
+}
+
+template <typename IntType>
+struct uniform_int_rng {
+  uniform_int_rng(IntType from, IntType to, unsigned seed = 13)
+      : m_rng(seed), m_distr(from, to) {}
+
+  IntType gen() { return m_distr(m_rng); }
+
+ private:
+  std::mt19937_64 m_rng;
+  std::uniform_int_distribution<IntType> m_distr;
+};
+
+struct loader {
+  loader(char const* filename)
+      : m_num_bytes_pods(0),
+        m_num_bytes_vecs_of_pods(0),
+        m_is(filename, std::ios::binary) {
+    if (!m_is.good()) {
+      throw std::runtime_error(
+          "Error in opening binary "
+          "file.");
+    }
+  }
+
+  ~loader() { m_is.close(); }
+
+  template <typename T>
+  void visit(T& val) {
+#if __cplusplus >= 201703L
+    if constexpr (std::is_pod<T>::value) {
+#else
+    if (std::is_pod<T>::value) {
+#endif
+      load_pod(m_is, val);
+      m_num_bytes_pods += pod_bytes(val);
+    } else {
+      val.visit(*this);
+    }
+  }
+
+  template <typename T, typename Allocator>
+  void visit(std::vector<T, Allocator>& vec) {
+    size_t n;
+    visit(n);
+    vec.resize(n);
+#if __cplusplus >= 201703L
+    if constexpr (std::is_pod<T>::value) {
+#else
+    if (std::is_pod<T>::value) {
+#endif
+      m_is.read(reinterpret_cast<char*>(vec.data()),
+                static_cast<std::streamsize>(sizeof(T) * n));
+      m_num_bytes_vecs_of_pods += n * sizeof(T);
+    } else {
+      for (auto& v : vec)
+        visit(v);
+    }
+  }
+
+  size_t bytes() { return m_is.tellg(); }
+
+  size_t bytes_pods() { return m_num_bytes_pods; }
+
+  size_t bytes_vecs_of_pods() { return m_num_bytes_vecs_of_pods; }
+
+ private:
+  size_t m_num_bytes_pods;
+  size_t m_num_bytes_vecs_of_pods;
+  std::ifstream m_is;
+};
+
+struct saver {
+  saver(char const* filename) : m_os(filename, std::ios::binary) {
+    if (!m_os.good()) {
+      throw std::runtime_error(
+          "Error in opening binary "
+          "file.");
+    }
+  }
+
+  ~saver() { m_os.close(); }
+
+  template <typename T>
+  void visit(T& val) {
+#if __cplusplus >= 201703L
+    if constexpr (std::is_pod<T>::value) {
+#else
+    if (std::is_pod<T>::value) {
+#endif
+      save_pod(m_os, val);
+    } else {
+      val.visit(*this);
+    }
+  }
+
+  template <typename T, typename Allocator>
+  void visit(std::vector<T, Allocator>& vec) {
+#if __cplusplus >= 201703L
+    if constexpr (std::is_pod<T>::value) {
+#else
+    if (std::is_pod<T>::value) {
+#endif
+      save_vec(m_os, vec);
+    } else {
+      size_t n = vec.size();
+      visit(n);
+      for (auto& v : vec)
+        visit(v);
+    }
+  }
+
+  size_t bytes() { return m_os.tellp(); }
+
+ private:
+  std::ofstream m_os;
+};
+
+inline std::string demangle(char const* mangled_name) {
+  size_t len = 0;
+  int status = 0;
+  std::unique_ptr<char, decltype(&std::free)> ptr(
+      __cxxabiv1::__cxa_demangle(mangled_name, nullptr, &len, &status),
+      &std::free);
+  return ptr.get();
+}
+
+struct sizer {
+  sizer(std::string const& root_name = "")
+      : m_root(0, 0, root_name), m_current(&m_root) {}
+
+  struct node {
+    node(size_t b, size_t d, std::string const& n = "")
+        : bytes(b), depth(d), name(n) {}
+
+    size_t bytes;
+    size_t depth;
+    std::string name;
+    std::vector<node> children;
+  };
+
+  template <typename T>
+  void visit(T& val) {
+#if __cplusplus >= 201703L
+    if constexpr (std::is_pod<T>::value) {
+#else
+    if (std::is_pod<T>::value) {
+#endif
+      node n(pod_bytes(val), m_current->depth + 1, demangle(typeid(T).name()));
+      m_current->children.push_back(n);
+      m_current->bytes += n.bytes;
+    } else {
+      val.visit(*this);
+    }
+  }
+
+  template <typename T, typename Allocator>
+  void visit(std::vector<T, Allocator>& vec) {
+#if __cplusplus >= 201703L
+    if constexpr (std::is_pod<T>::value) {
+#else
+    if (std::is_pod<T>::value) {
+#endif
+      node n(vec_bytes(vec), m_current->depth + 1,
+             demangle(typeid(std::vector<T>).name()));
+      m_current->children.push_back(n);
+      m_current->bytes += n.bytes;
+    } else {
+      size_t n = vec.size();
+      m_current->bytes += pod_bytes(n);
+      node* parent = m_current;
+      for (auto& v : vec) {
+        node n(0, parent->depth + 1, demangle(typeid(T).name()));
+        parent->children.push_back(n);
+        m_current = &parent->children.back();
+        visit(v);
+        parent->bytes += m_current->bytes;
+      }
+      m_current = parent;
+    }
+  }
+
+  template <typename Device>
+  void print(node const& n, size_t total_bytes, Device& device) const {
+    auto indent = std::string(n.depth * 4, ' ');
+    device << indent << "'" << n.name << "' - bytes = " << n.bytes << " ("
+           << n.bytes * 100.0 / total_bytes << "%)" << std::endl;
+    for (auto const& child : n.children) {
+      device << indent;
+      print(child, total_bytes, device);
+    }
+  }
+
+  template <typename Device>
+  void print(Device& device) const {
+    print(m_root, bytes(), device);
+  }
+
+  size_t bytes() const { return m_root.bytes; }
+
+ private:
+  node m_root;
+  node* m_current;
+};
+
+template <typename T>
+struct allocator : std::allocator<T> {
+  typedef T value_type;
+
+  allocator() : m_addr(nullptr) {}
+
+  allocator(T* addr) : m_addr(addr) {}
+
+  T* allocate(size_t n) {
+    if (m_addr == nullptr)
+      return std::allocator<T>::allocate(n);
+    return m_addr;
+  }
+
+  void deallocate(T* p, size_t n) {
+    if (m_addr == nullptr)
+      return std::allocator<T>::deallocate(p, n);
+  }
+
+ private:
+  T* m_addr;
+};
+
+struct contiguous_memory_allocator {
+  contiguous_memory_allocator() : m_begin(nullptr), m_end(nullptr), m_size(0) {}
+
+  struct visitor {
+    visitor(uint8_t* begin, size_t size, char const* filename)
+        : m_begin(begin),
+          m_end(begin),
+          m_size(size),
+          m_is(filename, std::ios::binary) {
+      if (!m_is.good()) {
+        throw std::runtime_error(
+            "Error in opening binary "
+            "file.");
+      }
+    }
+
+    ~visitor() { m_is.close(); }
+
+    template <typename T>
+    void visit(T& val) {
+#if __cplusplus >= 201703L
+      if constexpr (std::is_pod<T>::value) {
+#else
+      if (std::is_pod<T>::value) {
+#endif
+        load_pod(m_is, val);
+      } else {
+        val.visit(*this);
+      }
+    }
+
+    template <typename T, typename Allocator>
+    void visit(std::vector<T, Allocator>& vec) {
+#if __cplusplus >= 201703L
+      if constexpr (std::is_pod<T>::value) {
+#else
+      if (std::is_pod<T>::value) {
+#endif
+        vec = std::vector<T, Allocator>(make_allocator<T>());
+        load_vec(m_is, vec);
+        consume(vec.size() * sizeof(T));
+      } else {
+        size_t n;
+        visit(n);
+        vec.resize(n);
+        for (auto& v : vec)
+          visit(v);
+      }
+    }
+
+    uint8_t* end() { return m_end; }
+
+    size_t size() const { return m_size; }
+
+    size_t allocated() const {
+      assert(m_end >= m_begin);
+      return m_end - m_begin;
+    }
+
+    template <typename T>
+    allocator<T> make_allocator() {
+      return allocator<T>(reinterpret_cast<T*>(m_end));
+    }
+
+    void consume(size_t num_bytes) {
+      if (m_end == nullptr)
+        return;
+      if (allocated() + num_bytes > size()) {
+        throw std::runtime_error("allocation failed");
+      }
+      m_end += num_bytes;
+    }
+
+   private:
+    uint8_t* m_begin;
+    uint8_t* m_end;
+    size_t m_size;
+    std::ifstream m_is;
+  };
+
+  template <typename T>
+  size_t allocate(T& data_structure, char const* filename) {
+    loader l(filename);
+    l.visit(data_structure);
+    m_size = l.bytes_vecs_of_pods();
+    m_begin = reinterpret_cast<uint8_t*>(malloc(m_size));
+    if (m_begin == nullptr)
+      throw std::runtime_error("malloc failed");
+    visitor v(m_begin, m_size, filename);
+    v.visit(data_structure);
+    m_end = v.end();
+    return l.bytes();
+  }
+
+  ~contiguous_memory_allocator() { free(m_begin); }
+
+  uint8_t* begin() { return m_begin; }
+
+  uint8_t* end() { return m_end; }
+
+  size_t size() const { return m_size; }
+
+ private:
+  uint8_t* m_begin;
+  uint8_t* m_end;
+  size_t m_size;
+};
+
+template <typename T, typename Visitor>
+size_t visit(T& data_structure, char const* filename) {
+  Visitor visitor(filename);
+  visitor.visit(data_structure);
+  return visitor.bytes();
+}
+
+template <typename T>
+size_t load(T& data_structure, char const* filename) {
+  return visit<T, loader>(data_structure, filename);
+}
+
+template <typename T>
+size_t load_with_custom_memory_allocation(T& data_structure,
+                                          char const* filename) {
+  return data_structure.get_allocator().allocate(data_structure, filename);
+}
+
+template <typename T>
+size_t save(T& data_structure, char const* filename) {
+  return visit<T, saver>(data_structure, filename);
+}
+
+template <typename T, typename Device>
+size_t print_size(T& data_structure, Device& device) {
+  sizer visitor(demangle(typeid(T).name()));
+  visitor.visit(data_structure);
+  visitor.print(device);
+  return visitor.bytes();
+}
+
+#if defined(__CYGWIN__) || defined(_WIN32) || defined(_WIN64)
+#else
+struct directory {
+  struct file_name {
+    std::string name;
+    std::string fullpath;
+    std::string extension;
+  };
+
+  ~directory() {
+    for (int i = 0; i != items(); ++i) {
+      free(m_items_names[i]);
+    }
+    free(m_items_names);
+  }
+
+  directory(std::string const& name) : m_name(name) {
+    m_n = scandir(m_name.c_str(), &m_items_names, NULL, alphasort);
+    if (m_n < 0) {
+      throw std::runtime_error("error during scandir");
+    }
+  }
+
+  std::string const& name() const { return m_name; }
+
+  int items() const { return m_n; }
+
+  struct iterator {
+    iterator(directory const* d, int i) : m_d(d), m_i(i) {}
+
+    file_name operator*() {
+      file_name fn;
+      fn.name = m_d->m_items_names[m_i]->d_name;
+      fn.fullpath = m_d->name() + "/" + fn.name;
+      size_t p = fn.name.find_last_of(".");
+      fn.extension = fn.name.substr(p + 1);
+      return fn;
+    }
+
+    void operator++() { ++m_i; }
+
+    bool operator==(iterator const& rhs) const { return m_i == rhs.m_i; }
+
+    bool operator!=(iterator const& rhs) const { return !(*this == rhs); }
+
+   private:
+    directory const* m_d;
+    int m_i;
+  };
+
+  iterator begin() { return iterator(this, 0); }
+
+  iterator end() { return iterator(this, items()); }
+
+ private:
+  std::string m_name;
+  struct dirent** m_items_names;
+  int m_n;
+};
+#endif
+
+inline bool create_directory(std::string const& name) {
+  if (mkdir(name.c_str(), 0777) != 0) {
+    if (errno == EEXIST) {
+      std::cerr << "directory already exists" << std::endl;
+    }
+    return false;
+  }
+  return true;
+}
+
+inline bool remove_directory(std::string const& name) {
+  return rmdir(name.c_str()) == 0;
+}
+
+}  // namespace essentials
diff --git a/thirdparty/pthash/fastmod/fastmod.h b/thirdparty/pthash/fastmod/fastmod.h
new file mode 100644
index 00000000..8ac9743d
--- /dev/null
+++ b/thirdparty/pthash/fastmod/fastmod.h
@@ -0,0 +1,209 @@
+// credits to Daniel Lemire: https://github.com/lemire/fastmod
+
+#ifndef FASTMOD_H
+#define FASTMOD_H
+
+#ifndef __cplusplus
+#include <stdbool.h>
+#include <stdint.h>
+#else
+// In C++ <cstdbool>/<stdbool.h> are irelevant as bool is already a type
+#include <cstdint>
+#endif
+
+#ifndef __cplusplus
+#define FASTMOD_API static inline
+#else
+// In C++ we mark all the functions inline.
+// If C++14 relaxed constexpr is supported we use constexpr so functions
+// can be used at compile-time.
+#if __cpp_constexpr >= 201304 && !defined(_MSC_VER)
+// visual studio does not like constexpr
+#define FASTMOD_API constexpr
+#define FASTMOD_CONSTEXPR constexpr
+#else
+#define FASTMOD_API inline
+#define FASTMOD_CONSTEXPR
+#endif
+#endif
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+#ifdef __cplusplus
+namespace fastmod {
+#endif
+
+#ifdef _MSC_VER
+
+// __umulh is only available in x64 mode under Visual Studio: don't compile to
+// 32-bit!
+FASTMOD_API uint64_t mul128_u32(uint64_t lowbits, uint32_t d) {
+  return __umulh(lowbits, d);
+}
+
+#else  // _MSC_VER NOT defined
+
+FASTMOD_API uint64_t mul128_u32(uint64_t lowbits, uint32_t d) {
+  return ((__uint128_t) lowbits * d) >> 64;
+}
+
+FASTMOD_API uint64_t mul128_s32(uint64_t lowbits, int32_t d) {
+  return ((__int128_t) lowbits * d) >> 64;
+}
+
+// This is for the 64-bit functions.
+FASTMOD_API uint64_t mul128_u64(__uint128_t lowbits, uint64_t d) {
+  __uint128_t bottom_half =
+      (lowbits & UINT64_C(0xFFFFFFFFFFFFFFFF)) * d;  // Won't overflow
+  bottom_half >>=
+      64;  // Only need the top 64 bits, as we'll shift the lower half away;
+  __uint128_t top_half = (lowbits >> 64) * d;
+  __uint128_t both_halves =
+      bottom_half + top_half;  // Both halves are already shifted down by 64
+  both_halves >>= 64;          // Get top half of both_halves
+  return (uint64_t) both_halves;
+}
+
+#endif  // _MSC_VER
+
+/**
+ * Unsigned integers.
+ * Usage:
+ *  uint32_t d = ... ; // divisor, should be non-zero
+ *  uint64_t M = computeM_u32(d); // do once
+ *  fastmod_u32(a,M,d) is a % d for all 32-bit a.
+ *
+ **/
+
+// M = ceil( (1<<64) / d ), d > 0
+FASTMOD_API uint64_t computeM_u32(uint32_t d) {
+  return UINT64_C(0xFFFFFFFFFFFFFFFF) / d + 1;
+}
+
+// fastmod computes (a % d) given precomputed M
+FASTMOD_API uint32_t fastmod_u32(uint32_t a, uint64_t M, uint32_t d) {
+  uint64_t lowbits = M * a;
+  return (uint32_t) (mul128_u32(lowbits, d));
+}
+
+// fastmod computes (a / d) given precomputed M for d>1
+FASTMOD_API uint32_t fastdiv_u32(uint32_t a, uint64_t M) {
+  return (uint32_t) (mul128_u32(M, a));
+}
+
+// given precomputed M, checks whether n % d == 0
+FASTMOD_API bool is_divisible(uint32_t n, uint64_t M) { return n * M <= M - 1; }
+
+/**
+ * signed integers
+ * Usage:
+ *  int32_t d = ... ; // should be non-zero and between [-2147483647,2147483647]
+ *  int32_t positive_d = d < 0 ? -d : d; // absolute value
+ *  uint64_t M = computeM_s32(d); // do once
+ *  fastmod_s32(a,M,positive_d) is a % d for all 32-bit a.
+ **/
+
+// M = floor( (1<<64) / d ) + 1
+// you must have that d is different from 0 and -2147483648
+// if d = -1 and a = -2147483648, the result is undefined
+FASTMOD_API uint64_t computeM_s32(int32_t d) {
+  if (d < 0)
+    d = -d;
+  return UINT64_C(0xFFFFFFFFFFFFFFFF) / d + 1 + ((d & (d - 1)) == 0 ? 1 : 0);
+}
+
+// fastmod computes (a % d) given precomputed M,
+// you should pass the absolute value of d
+FASTMOD_API int32_t fastmod_s32(int32_t a, uint64_t M, int32_t positive_d) {
+  uint64_t lowbits = M * a;
+  int32_t highbits = mul128_u32(lowbits, positive_d);
+  return highbits - ((positive_d - 1) & (a >> 31));
+}
+
+#ifndef _MSC_VER
+// fastmod computes (a / d) given precomputed M, assumes that d must not
+// be one of -1, 1, or -2147483648
+FASTMOD_API int32_t fastdiv_s32(int32_t a, uint64_t M, int32_t d) {
+  uint64_t highbits = mul128_s32(M, a);
+  highbits += (a < 0 ? 1 : 0);
+  if (d < 0)
+    return -(int32_t) (highbits);
+  return (int32_t) (highbits);
+}
+
+// What follows is the 64-bit functions.
+// They are currently not supported on Visual Studio
+// due to the lack of a mul128_u64 function.
+// They may not be faster than what the compiler
+// can produce.
+
+FASTMOD_API __uint128_t computeM_u64(uint64_t d) {
+  // what follows is just ((__uint128_t)0 - 1) / d) + 1 spelled out
+  __uint128_t M = UINT64_C(0xFFFFFFFFFFFFFFFF);
+  M <<= 64;
+  M |= UINT64_C(0xFFFFFFFFFFFFFFFF);
+  M /= d;
+  M += 1;
+  return M;
+}
+
+FASTMOD_API __uint128_t computeM_s64(int64_t d) {
+  if (d < 0)
+    d = -d;
+  __uint128_t M = UINT64_C(0xFFFFFFFFFFFFFFFF);
+  M <<= 64;
+  M |= UINT64_C(0xFFFFFFFFFFFFFFFF);
+  M /= d;
+  M += 1;
+  M += ((d & (d - 1)) == 0 ? 1 : 0);
+  return M;
+}
+
+FASTMOD_API uint64_t fastmod_u64(uint64_t a, __uint128_t M, uint64_t d) {
+  __uint128_t lowbits = M * a;
+  return mul128_u64(lowbits, d);
+}
+
+FASTMOD_API uint64_t fastdiv_u64(uint64_t a, __uint128_t M) {
+  return mul128_u64(M, a);
+}
+
+// End of the 64-bit functions
+
+#endif  // #ifndef _MSC_VER
+
+#ifdef __cplusplus
+
+template <uint32_t d>
+FASTMOD_API uint32_t fastmod(uint32_t x) {
+  FASTMOD_CONSTEXPR uint64_t v = computeM_u32(d);
+  return fastmod_u32(x, v, d);
+}
+template <uint32_t d>
+FASTMOD_API uint32_t fastdiv(uint32_t x) {
+  FASTMOD_CONSTEXPR uint64_t v = computeM_u32(d);
+  return fastdiv_u32(x, v);
+}
+template <int32_t d>
+FASTMOD_API int32_t fastmod(int32_t x) {
+  FASTMOD_CONSTEXPR uint64_t v = computeM_s32(d);
+  return fastmod_s32(x, v, d);
+}
+template <int32_t d>
+FASTMOD_API int32_t fastdiv(int32_t x) {
+  FASTMOD_CONSTEXPR uint64_t v = computeM_s32(d);
+  return fastdiv_s32(x, v, d);
+}
+
+}  // fastmod
+#endif
+
+// There's no reason to polute the global scope with this macro once its use
+// ends This won't create any problems as the preprocessor will have done its
+// thing once it reaches this point
+#undef FASTMOD_API
+#undef FASTMOD_CONSTEXPR
+
+#endif  // FASTMOD_H
\ No newline at end of file
diff --git a/thirdparty/pthash/mm_file/mm_file.hpp b/thirdparty/pthash/mm_file/mm_file.hpp
new file mode 100644
index 00000000..c132a194
--- /dev/null
+++ b/thirdparty/pthash/mm_file/mm_file.hpp
@@ -0,0 +1,176 @@
+/** Copyright 2019 Giulio Ermanno Pibiri
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * Memory-mapped files:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/mm_file
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>  // close(fd)
+#include <string>
+#include <type_traits>
+
+namespace mm {
+
+namespace advice {
+static const int normal = POSIX_MADV_NORMAL;
+static const int random = POSIX_MADV_RANDOM;
+static const int sequential = POSIX_MADV_SEQUENTIAL;
+}  // namespace advice
+
+template <typename T>
+struct file {
+  file() { init(); }
+  file(file&& other) {
+    m_fd = other.m_fd;
+    m_size = other.m_size;
+    m_data = other.m_data;
+    other.init();
+  }
+
+  ~file() { close(); }
+
+  file(file const&) = delete;             // non construction-copyable
+  file& operator=(file const&) = delete;  // non copyable
+
+  bool is_open() const { return m_fd != -1; }
+
+  void close() {
+    if (is_open()) {
+      if (munmap((char*) m_data, m_size) == -1) {
+        throw std::runtime_error("munmap failed when closing file");
+      }
+      ::close(m_fd);
+      init();
+    }
+  }
+
+  size_t bytes() const { return m_size; }
+
+  size_t size() const { return m_size / sizeof(T); }
+
+  T* data() const { return m_data; }
+
+  struct iterator {
+    iterator(T* addr, size_t offset = 0) : m_ptr(addr + offset) {}
+
+    T operator*() { return *m_ptr; }
+
+    void operator++() { ++m_ptr; }
+
+    bool operator==(iterator const& rhs) const { return m_ptr == rhs.m_ptr; }
+
+    bool operator!=(iterator const& rhs) const { return !((*this) == rhs); }
+
+   private:
+    T* m_ptr;
+  };
+
+  iterator begin() const { return iterator(m_data); }
+
+  iterator end() const { return iterator(m_data, size()); }
+
+ protected:
+  int m_fd;
+  size_t m_size;
+  T* m_data;
+
+  void init() {
+    m_fd = -1;
+    m_size = 0;
+    m_data = nullptr;
+  }
+
+  void check_fd() {
+    if (m_fd == -1)
+      throw std::runtime_error("cannot open file");
+  }
+};
+
+template <typename Pointer>
+Pointer mmap(int fd, size_t size, int prot) {
+  static const size_t offset = 0;
+  Pointer p =
+      static_cast<Pointer>(::mmap(NULL, size, prot, MAP_SHARED, fd, offset));
+  if (p == MAP_FAILED)
+    throw std::runtime_error("mmap failed");
+  return p;
+}
+
+template <typename T>
+struct file_source : public file<T const> {
+  typedef file<T const> base;
+
+  file_source() {}
+
+  file_source(std::string const& path, int adv = advice::normal) {
+    open(path, adv);
+  }
+
+  void open(std::string const& path, int adv = advice::normal) {
+    base::m_fd = ::open(path.c_str(), O_RDONLY);
+    base::check_fd();
+    struct stat fs;
+    if (fstat(base::m_fd, &fs) == -1) {
+      throw std::runtime_error("cannot stat file");
+    }
+    base::m_size = fs.st_size;
+    base::m_data = mmap<T const*>(base::m_fd, base::m_size, PROT_READ);
+    if (posix_madvise((void*) base::m_data, base::m_size, adv)) {
+      throw std::runtime_error("madvise failed");
+    }
+  }
+};
+
+template <typename T>
+struct file_sink : public file<T> {
+  typedef file<T> base;
+
+  file_sink() {}
+
+  file_sink(std::string const& path) { open(path); }
+
+  file_sink(std::string const& path, size_t n) { open(path, n); }
+
+  void open(std::string const& path) {
+    static const mode_t mode = 0600;  // read/write
+    base::m_fd = ::open(path.c_str(), O_RDWR, mode);
+    base::check_fd();
+    struct stat fs;
+    if (fstat(base::m_fd, &fs) == -1) {
+      throw std::runtime_error("cannot stat file");
+    }
+    base::m_size = fs.st_size;
+    base::m_data = mmap<T*>(base::m_fd, base::m_size, PROT_READ | PROT_WRITE);
+  }
+
+  void open(std::string const& path, size_t n) {
+    static const mode_t mode = 0600;  // read/write
+    base::m_fd = ::open(path.c_str(), O_RDWR | O_CREAT | O_TRUNC, mode);
+    base::check_fd();
+    base::m_size = n * sizeof(T);
+    ftruncate(base::m_fd,
+              base::m_size);  // truncate the file at the new size
+    base::m_data = mmap<T*>(base::m_fd, base::m_size, PROT_READ | PROT_WRITE);
+  }
+};
+
+}  // namespace mm
\ No newline at end of file
diff --git a/thirdparty/pthash/pthash.hpp b/thirdparty/pthash/pthash.hpp
new file mode 100644
index 00000000..d46f5412
--- /dev/null
+++ b/thirdparty/pthash/pthash.hpp
@@ -0,0 +1,25 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "pthash/encoders/encoders.hpp"
+#include "pthash/single_phf.hpp"
diff --git a/thirdparty/pthash/single_phf.hpp b/thirdparty/pthash/single_phf.hpp
new file mode 100644
index 00000000..005e523e
--- /dev/null
+++ b/thirdparty/pthash/single_phf.hpp
@@ -0,0 +1,159 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "pthash/builders/external_memory_builder_single_phf.hpp"
+#include "pthash/builders/internal_memory_builder_single_phf.hpp"
+#include "pthash/builders/util.hpp"
+#include "pthash/encoders/ef_sequence.hpp"
+#include "pthash/utils/bucketers.hpp"
+
+namespace pthash {
+
+template <typename Hasher, typename Encoder, bool Minimal>
+struct single_phf {
+  typedef Encoder encoder_type;
+  static constexpr bool minimal = Minimal;
+
+  template <typename Iterator>
+  build_timings build_in_internal_memory(Iterator keys, uint64_t n,
+                                         build_configuration const& config) {
+    internal_memory_builder_single_phf<Hasher> builder;
+    auto timings = builder.build_from_keys(keys, n, config);
+    timings.encoding_seconds = build(builder, config);
+    return timings;
+  }
+
+  template <typename Iterator>
+  build_timings build_in_external_memory(Iterator keys, uint64_t n,
+                                         build_configuration const& config) {
+    external_memory_builder_single_phf<Hasher> builder;
+    auto timings = builder.build_from_keys(keys, n, config);
+    timings.encoding_seconds = build(builder, config);
+    return timings;
+  }
+
+  template <typename Builder>
+  double build(Builder const& builder, build_configuration const&) {
+    auto start = clock_type::now();
+    m_seed = builder.seed();
+    m_num_keys = builder.num_keys();
+    m_table_size = builder.table_size();
+    m_M = fastmod::computeM_u64(m_table_size);
+    m_bucketer = builder.bucketer();
+    m_pilots.encode(builder.pilots().data(), m_bucketer.num_buckets());
+#if __cplusplus >= 201703L
+    if constexpr (Minimal) {
+#else
+    if (Minimal) {
+#endif
+      m_free_slots.encode(builder.free_slots().data(),
+                          m_table_size - m_num_keys);
+    }
+    auto stop = clock_type::now();
+    return seconds(stop - start);
+  }
+
+  template <typename T>
+  uint64_t operator()(T const& key) const {
+    auto hash = Hasher::hash(key, m_seed);
+    return position(hash);
+  }
+
+  uint64_t position(typename Hasher::hash_type hash) const {
+    uint64_t bucket = m_bucketer.bucket(hash.first());
+    uint64_t pilot = m_pilots.access(bucket);
+    uint64_t hashed_pilot = default_hash64(pilot, m_seed);
+    uint64_t p =
+        fastmod::fastmod_u64(hash.second() ^ hashed_pilot, m_M, m_table_size);
+#if __cplusplus >= 201703L
+    if constexpr (Minimal) {
+#else
+    if (Minimal) {
+#endif
+      if (PTHASH_LIKELY(p < num_keys()))
+        return p;
+      return m_free_slots.access(p - num_keys());
+    }
+    return p;
+  }
+
+  size_t num_bits_for_pilots() const {
+    return 8 * (sizeof(m_seed) + sizeof(m_num_keys) + sizeof(m_table_size) +
+                sizeof(m_M)) +
+           m_bucketer.num_bits() + m_pilots.num_bits();
+  }
+
+  size_t num_bits_for_mapper() const { return m_free_slots.num_bits(); }
+
+  size_t num_bits() const {
+    return num_bits_for_pilots() + num_bits_for_mapper();
+  }
+
+  inline uint64_t num_keys() const { return m_num_keys; }
+
+  inline uint64_t table_size() const { return m_table_size; }
+
+  template <typename Visitor>
+  void visit(Visitor& visitor) {
+    visitor.visit(m_seed);
+    visitor.visit(m_num_keys);
+    visitor.visit(m_table_size);
+    visitor.visit(m_M);
+    visitor.visit(m_bucketer);
+    visitor.visit(m_pilots);
+    visitor.visit(m_free_slots);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load(m_seed);
+    loader.load(m_num_keys);
+    loader.load(m_table_size);
+    loader.load(m_M);
+    m_bucketer.load(loader);
+    m_pilots.load(loader);
+    m_free_slots.load(loader);
+  }
+
+  template <typename Dumper>
+  void dump(Dumper& dumper) const {
+    dumper.dump(m_seed);
+    dumper.dump(m_num_keys);
+    dumper.dump(m_table_size);
+    dumper.dump(m_M);
+    m_bucketer.dump(dumper);
+    m_pilots.dump(dumper);
+    m_free_slots.dump(dumper);
+  }
+
+ private:
+  uint64_t m_seed;
+  uint64_t m_num_keys;
+  uint64_t m_table_size;
+  __uint128_t m_M;
+  skew_bucketer m_bucketer;
+  Encoder m_pilots;
+  ef_sequence<false> m_free_slots;
+};
+
+}  // namespace pthash
diff --git a/thirdparty/pthash/utils/bucketers.hpp b/thirdparty/pthash/utils/bucketers.hpp
new file mode 100644
index 00000000..3af0ce06
--- /dev/null
+++ b/thirdparty/pthash/utils/bucketers.hpp
@@ -0,0 +1,92 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "pthash/utils/util.hpp"
+
+namespace pthash {
+
+struct skew_bucketer {
+  skew_bucketer() {}
+
+  void init(uint64_t num_buckets) {
+    m_num_dense_buckets = 0.3 * num_buckets;
+    m_num_sparse_buckets = num_buckets - m_num_dense_buckets;
+    m_M_num_dense_buckets = fastmod::computeM_u64(m_num_dense_buckets);
+    m_M_num_sparse_buckets = fastmod::computeM_u64(m_num_sparse_buckets);
+  }
+
+  inline uint64_t bucket(uint64_t hash) const {
+    static const uint64_t T = UINT64_MAX / 5 * 3;
+    return (hash < T) ? fastmod::fastmod_u64(hash, m_M_num_dense_buckets,
+                                             m_num_dense_buckets)
+                      : m_num_dense_buckets +
+                            fastmod::fastmod_u64(hash, m_M_num_sparse_buckets,
+                                                 m_num_sparse_buckets);
+  }
+
+  uint64_t num_buckets() const {
+    return m_num_dense_buckets + m_num_sparse_buckets;
+  }
+
+  size_t num_bits() const {
+    return 8 * (sizeof(m_num_dense_buckets) + sizeof(m_num_sparse_buckets) +
+                sizeof(m_M_num_dense_buckets) + sizeof(m_M_num_sparse_buckets));
+  }
+
+  void swap(skew_bucketer& other) {
+    std::swap(m_num_dense_buckets, other.m_num_dense_buckets);
+    std::swap(m_num_sparse_buckets, other.m_num_sparse_buckets);
+    std::swap(m_M_num_dense_buckets, other.m_M_num_dense_buckets);
+    std::swap(m_M_num_sparse_buckets, other.m_M_num_sparse_buckets);
+  }
+
+  template <typename Visitor>
+  void visit(Visitor& visitor) {
+    visitor.visit(m_num_dense_buckets);
+    visitor.visit(m_num_sparse_buckets);
+    visitor.visit(m_M_num_dense_buckets);
+    visitor.visit(m_M_num_sparse_buckets);
+  }
+
+  template <typename Loader>
+  void load(Loader& loader) {
+    loader.load(m_num_dense_buckets);
+    loader.load(m_num_sparse_buckets);
+    loader.load(m_M_num_dense_buckets);
+    loader.load(m_M_num_sparse_buckets);
+  }
+
+  template <typename Dumper>
+  void dump(Dumper& dumper) const {
+    dumper.dump(m_num_dense_buckets);
+    dumper.dump(m_num_sparse_buckets);
+    dumper.dump(m_M_num_dense_buckets);
+    dumper.dump(m_M_num_sparse_buckets);
+  }
+
+ private:
+  uint64_t m_num_dense_buckets, m_num_sparse_buckets;
+  __uint128_t m_M_num_dense_buckets, m_M_num_sparse_buckets;
+};
+
+}  // namespace pthash
\ No newline at end of file
diff --git a/thirdparty/pthash/utils/hasher.hpp b/thirdparty/pthash/utils/hasher.hpp
new file mode 100644
index 00000000..9856b3be
--- /dev/null
+++ b/thirdparty/pthash/utils/hasher.hpp
@@ -0,0 +1,188 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+// See also https://github.com/jermp/bench_hash_functions
+
+namespace pthash {
+
+struct byte_range {
+  uint8_t const* begin;
+  uint8_t const* end;
+};
+
+/*
+    This code is an adaptation from
+    https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
+        by Austin Appleby
+*/
+inline uint64_t MurmurHash2_64(void const* key, size_t len, uint64_t seed) {
+  const uint64_t m = 0xc6a4a7935bd1e995ULL;
+  const int r = 47;
+
+  uint64_t h = seed ^ (len * m);
+
+#if defined(__arm) || defined(__arm__)
+  const size_t ksize = sizeof(uint64_t);
+  const unsigned char* data = (const unsigned char*) key;
+  const unsigned char* end = data + (std::size_t)(len / 8) * ksize;
+#else
+  const uint64_t* data = (const uint64_t*) key;
+  const uint64_t* end = data + (len / 8);
+#endif
+
+  while (data != end) {
+#if defined(__arm) || defined(__arm__)
+    uint64_t k;
+    memcpy(&k, data, ksize);
+    data += ksize;
+#else
+    uint64_t k = *data++;
+#endif
+
+    k *= m;
+    k ^= k >> r;
+    k *= m;
+
+    h ^= k;
+    h *= m;
+  }
+
+  const unsigned char* data2 = (const unsigned char*) data;
+
+  switch (len & 7) {
+  // fall through
+  case 7:
+    h ^= uint64_t(data2[6]) << 48;
+  // fall through
+  case 6:
+    h ^= uint64_t(data2[5]) << 40;
+  // fall through
+  case 5:
+    h ^= uint64_t(data2[4]) << 32;
+  // fall through
+  case 4:
+    h ^= uint64_t(data2[3]) << 24;
+  // fall through
+  case 3:
+    h ^= uint64_t(data2[2]) << 16;
+  // fall through
+  case 2:
+    h ^= uint64_t(data2[1]) << 8;
+  // fall through
+  case 1:
+    h ^= uint64_t(data2[0]);
+    h *= m;
+  };
+
+  h ^= h >> r;
+  h *= m;
+  h ^= h >> r;
+
+  return h;
+}
+
+inline uint64_t default_hash64(uint64_t val, uint64_t seed) {
+  return MurmurHash2_64(&val, sizeof(uint64_t), seed);
+}
+
+struct hash64 {
+  hash64() {}
+  hash64(uint64_t hash) : m_hash(hash) {}
+
+  inline uint64_t first() const { return m_hash; }
+
+  inline uint64_t second() const { return m_hash; }
+
+  inline uint64_t mix() const {
+    // From:
+    // http://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html
+    // 13-th variant
+    uint64_t z = m_hash;
+    z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
+    z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
+    return z ^ (z >> 31);
+  }
+
+ private:
+  uint64_t m_hash;
+};
+
+struct hash128 {
+  hash128() {}
+  hash128(uint64_t first, uint64_t second) : m_first(first), m_second(second) {}
+
+  inline uint64_t first() const { return m_first; }
+
+  inline uint64_t second() const { return m_second; }
+
+  inline uint64_t mix() const { return m_first ^ m_second; }
+
+ private:
+  uint64_t m_first, m_second;
+};
+
+struct murmurhash2_64 {
+  typedef hash64 hash_type;
+
+  // generic range of bytes
+  static inline hash64 hash(byte_range range, uint64_t seed) {
+    return MurmurHash2_64(range.begin, range.end - range.begin, seed);
+  }
+
+  // specialization for std::string
+  static inline hash64 hash(std::string const& val, uint64_t seed) {
+    return MurmurHash2_64(val.data(), val.size(), seed);
+  }
+
+  // specialization for uint64_t
+  static inline hash64 hash(uint64_t val, uint64_t seed) {
+    return MurmurHash2_64(reinterpret_cast<char const*>(&val), sizeof(val),
+                          seed);
+  }
+};
+
+struct murmurhash2_128 {
+  typedef hash128 hash_type;
+
+  // generic range of bytes
+  static inline hash128 hash(byte_range range, uint64_t seed) {
+    return {MurmurHash2_64(range.begin, range.end - range.begin, seed),
+            MurmurHash2_64(range.begin, range.end - range.begin, ~seed)};
+  }
+
+  // specialization for std::string
+  static inline hash128 hash(std::string const& val, uint64_t seed) {
+    return {MurmurHash2_64(val.data(), val.size(), seed),
+            MurmurHash2_64(val.data(), val.size(), ~seed)};
+  }
+
+  // specialization for uint64_t
+  static inline hash128 hash(uint64_t val, uint64_t seed) {
+    return {
+        MurmurHash2_64(reinterpret_cast<char const*>(&val), sizeof(val), seed),
+        MurmurHash2_64(reinterpret_cast<char const*>(&val), sizeof(val),
+                       ~seed)};
+  }
+};
+
+}  // namespace pthash
\ No newline at end of file
diff --git a/thirdparty/pthash/utils/logger.hpp b/thirdparty/pthash/utils/logger.hpp
new file mode 100644
index 00000000..068d04e8
--- /dev/null
+++ b/thirdparty/pthash/utils/logger.hpp
@@ -0,0 +1,87 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <ostream>
+#include <string>
+
+namespace pthash {
+
+struct progress_logger {
+  progress_logger(uint64_t total_events, std::string const& prefix = "",
+                  std::string const& suffix = "", bool enable = true)
+      : m_total_events(total_events),
+        m_prefix(prefix),
+        m_suffix(suffix),
+        m_logged_events(0) {
+    // TODO: improve the computation of log_step using timings !
+    uint64_t perc_fraction = (total_events >= 100000000) ? 100 : 20;
+    m_log_step = (total_events + perc_fraction - 1) / perc_fraction;
+    m_next_event_to_log = static_cast<uint64_t>(-1);
+    if (enable) {
+      m_next_event_to_log = m_log_step;
+      update(false);
+    }
+  }
+
+  inline void log() {
+    if (++m_logged_events >= m_next_event_to_log) {
+      update(false);
+      m_next_event_to_log += m_log_step;
+      // the following ensures the last update on 100%
+      if (m_next_event_to_log > m_total_events)
+        m_next_event_to_log = m_total_events;
+    }
+  }
+
+  void finalize() {
+    if (m_next_event_to_log != static_cast<uint64_t>(-1)) {
+      assert(m_next_event_to_log == m_total_events);
+      assert(m_logged_events == m_total_events);
+      update(true);
+    }
+  }
+
+  uint64_t total_events() const { return m_total_events; }
+
+  uint64_t logged_events() const { return m_logged_events; }
+
+ private:
+  inline void update(bool final) const {
+    uint64_t perc = (100 * m_logged_events / m_total_events);
+    std::cout << "\r" << m_prefix << perc << "%" << m_suffix;
+    if (final) {
+      std::cout << std::endl;
+    } else {
+      std::cout << std::flush;
+    }
+  }
+
+  const uint64_t m_total_events;
+  const std::string m_prefix = "";
+  const std::string m_suffix = "";
+  uint64_t m_logged_events;
+  uint64_t m_log_step;
+  uint64_t m_next_event_to_log;
+};
+
+}  // namespace pthash
\ No newline at end of file
diff --git a/thirdparty/pthash/utils/util.hpp b/thirdparty/pthash/utils/util.hpp
new file mode 100644
index 00000000..c64452c3
--- /dev/null
+++ b/thirdparty/pthash/utils/util.hpp
@@ -0,0 +1,57 @@
+/** Copyright 2020-2024 Giulio Ermanno Pibiri and Roberto Trani
+ *
+ * The following sets forth attribution notices for third party software.
+ *
+ * PTHash:
+ * The software includes components licensed by Giulio Ermanno Pibiri and
+ * Roberto Trani, available at https://github.com/jermp/pthash
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <chrono>
+#include <string>
+
+#include "pthash/essentials/essentials.hpp"
+#include "pthash/fastmod/fastmod.h"
+
+#define PTHASH_LIKELY(expr) __builtin_expect((bool) (expr), true)
+
+namespace pthash {
+
+typedef std::chrono::high_resolution_clock clock_type;
+
+namespace constants {
+static const uint64_t available_ram =
+    sysconf(_SC_PAGESIZE) * sysconf(_SC_PHYS_PAGES);
+static const uint64_t invalid_seed = uint64_t(-1);
+static const uint64_t invalid_num_buckets = uint64_t(-1);
+static const std::string default_tmp_dirname(".");
+}  // namespace constants
+
+inline uint64_t random_value() {
+  unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
+  std::mt19937_64 rng(seed);
+  return rng();
+}
+
+template <typename DurationType>
+double seconds(DurationType const& d) {
+  return static_cast<double>(
+             std::chrono::duration_cast<std::chrono::milliseconds>(d).count()) /
+         1000;  // better resolution than std::chrono::seconds
+}
+
+}  // namespace pthash