[Cartoweb-dev] [Bug 1381] New: Add ability to cache connectivity data to pgdijkstra

bugzilla-daemon at bugzilla.maptools.org bugzilla-daemon at bugzilla.maptools.org
Thu Mar 23 11:24:56 EST 2006


http://bugzilla.maptools.org/show_bug.cgi?id=1381

           Summary: Add ability to cache connectivity data to pgdijkstra
           Product: CartoWeb
           Version: unspecified
          Platform: PC
        OS/Version: All
            Status: NEW
          Severity: enhancement
          Priority: P2
         Component: Plugins
        AssignedTo: cartoweb-dev at lists.maptools.org
        ReportedBy: bytter at gmail.com


When using large connectivity tables (arround 1 million rows), postgresql is too
slow fetching data from that table into pgdijkstra. One solution (prototype
follows) is to cache that same data to disk, and then tell pgdijkstra to use it.
The following patch attempts to implement that, though it lacks heavy reviewing
and some code love (it has 'only' been tested under Windows and PostgreSQL 8.1).
Performance-wise, we've been able to decrease the time from 33seconds on a 700k
routing table down to 3seconds (10x fold increase in performance). Please
consider  this technique (though probably not the code per-se) to be made up-stream:

Index: boost_wrapper.cpp
===================================================================
RCS file: /var/lib/cvs/public/cartoweb3/contrib/pgdijkstra/boost_wrapper.cpp,v
retrieving revision 1.6
diff -u -r1.6 boost_wrapper.cpp
--- boost_wrapper.cpp	17 Oct 2005 07:12:27 -0000	1.6
+++ boost_wrapper.cpp	3 Mar 2006 20:38:34 -0000
@@ -73,6 +73,20 @@
     graph[e].id = id;
 }
 
+int serialize(char* filename, edge_t *edges, unsigned int count) {
+	ofstream myFile (filename, ios::out | ios::binary | ios::trunc);
+    myFile.write ((char*) edges, count * sizeof(edge_t));
+    myFile.close();
+    return 0;
+}
+
+int deserialize(char* filename, edge_t *edges, unsigned int count) {
+	ifstream myFile (filename, ios::in | ios::binary);
+    myFile.read((char*) edges, count * sizeof(edge_t));
+    myFile.close();
+    return 0;
+}
+
 int 
 boost_dijkstra(edge_t *edges, unsigned int count, int source_vertex_id, int
target_vertex_id,
                bool directed, bool has_reverse_cost,
@@ -115,7 +129,7 @@
                                                      edges[j].source, cost);
         }
     }
-
+	
     std::vector<vertex_descriptor> predecessors(num_vertices(graph));
 
     vertex_descriptor source_vertex = vertex(source_vertex_id, graph);
@@ -231,6 +245,7 @@
     int final_edges = NUM_EDGES - 1;
 
     ret = boost_dijkstra(e, NUM_EDGES, 0, final_edges, 1, 0, &path,
&path_count, &err_msg);
+    ret = deserialize(e, NUM_EDGES);
 
     if (ret < 0) 
     {
Index: dijkstra.c
===================================================================
RCS file: /var/lib/cvs/public/cartoweb3/contrib/pgdijkstra/dijkstra.c,v
retrieving revision 1.6
diff -u -r1.6 dijkstra.c
--- dijkstra.c	29 Aug 2005 15:02:38 -0000	1.6
+++ dijkstra.c	3 Mar 2006 20:38:09 -0000
@@ -60,6 +60,8 @@
 // --------------------------------------------------------------------------------
 
 Datum shortest_path(PG_FUNCTION_ARGS);
+Datum fshortest_path(PG_FUNCTION_ARGS);
+Datum serialize_to_file(PG_FUNCTION_ARGS);
 
 #undef DEBUG
 //#define DEBUG 1
@@ -180,6 +182,36 @@
     }
 }
 
+static int compute_shortest_path_from_file(char* filename, int ntuples, int
source_vertex_id, int target_vertex_id, bool directed, bool has_reverse_cost, 
+                                 path_element_t **path, int *path_count) {
+    edge_t *edges = NULL;
+    char *err_msg;
+    int ret = -1;
+
+    DBG("start shortest_path\n");
+       
+    edges = palloc(ntuples * sizeof(edge_t));
+    deserialize(filename, edges, (unsigned int) ntuples);
+        
+    DBG("Calling boost_dijkstra\n");
+        
+    profstop("extract", prof_extract);
+    profstart(prof_dijkstra);
+
+    ret = boost_dijkstra(edges, ntuples, source_vertex_id, target_vertex_id,
+                         directed, has_reverse_cost,
+                         path, path_count, &err_msg);
+
+    profstop("dijkstra", prof_dijkstra);
+    profstart(prof_store);
+
+    if (ret < 0)
+    {
+        elog(ERROR, "Error computing path: %s", err_msg);
+    } 
+      
+    return ret;
+}
 
 static int compute_shortest_path(char* sql, int source_vertex_id, int
target_vertex_id, bool directed, bool has_reverse_cost, 
                                  path_element_t **path, int *path_count) 
@@ -289,6 +321,107 @@
     return ret;
 }
 
+static int calculate_serialize_to_file(char* sql, char* filename, bool
directed, bool has_reverse_cost) 
+{
+
+    int SPIcode;
+    void *SPIplan;
+    Portal SPIportal;
+    bool moredata = TRUE;
+    int ntuples;
+    edge_t *edges = NULL;
+    int total_tuples = 0;
+    edge_columns_t edge_columns = {id: -1, source: -1, target: -1, 
+                                   cost: -1, reverse_cost: -1};
+    char *err_msg;
+    int ret = -1;
+
+    DBG("start shortest_path\n");
+        
+    SPIcode = SPI_connect();
+    if (SPIcode  != SPI_OK_CONNECT)
+    {
+        elog(ERROR, "shortest_path: couldn't open a connection to SPI");
+        return -1;
+    }
+
+    SPIplan = SPI_prepare(sql, 0, NULL);
+    if (SPIplan  == NULL)
+    {
+        elog(ERROR, "shortest_path: couldn't create query plan via SPI");
+        return -1;
+    }
+
+    if ((SPIportal = SPI_cursor_open(NULL, SPIplan, NULL, NULL, true)) == NULL) 
+    {
+        elog(ERROR, "shortest_path: SPI_cursor_open('%s') returns NULL", sql);
+        return -1;
+    }
+
+    while (moredata == TRUE)
+    {
+        SPI_cursor_fetch(SPIportal, TRUE, TUPLIMIT);
+
+        if (edge_columns.id == -1) 
+        {
+            if (fetch_edge_columns(SPI_tuptable, &edge_columns,
has_reverse_cost) == -1)
+                goto out;
+        }
+
+        ntuples = SPI_processed;
+        total_tuples += ntuples;
+        if (!edges)
+            edges = palloc(total_tuples * sizeof(edge_t));
+        else
+            edges = repalloc(edges, total_tuples * sizeof(edge_t));
+
+        if (edges == NULL) 
+        {
+            elog(ERROR, "Out of memory");
+            goto out;
+        }
+
+        if (ntuples > 0) 
+        {
+            int t;
+            SPITupleTable *tuptable = SPI_tuptable;
+            TupleDesc tupdesc = SPI_tuptable->tupdesc;
+                
+            for (t = 0; t < ntuples; t++) 
+            {
+                HeapTuple tuple = tuptable->vals[t];
+                fetch_edge(&tuple, &tupdesc, &edge_columns, &edges[total_tuples
- ntuples + t]);
+            }
+            SPI_freetuptable(tuptable);
+        } 
+        else 
+        {
+            moredata = FALSE;
+        }
+    }
+
+    ret = serialize(filename, edges, total_tuples);
+        
+out:
+    SPIcode = SPI_finish();
+    if (SPIcode  != SPI_OK_FINISH )
+    {
+        elog(ERROR,"couldn't disconnect from SPI");
+        return -1 ;
+    }
+    
+    return ret;
+}
+
+PG_FUNCTION_INFO_V1(serialize_to_file);
+Datum
+serialize_to_file(PG_FUNCTION_ARGS)
+{
+    calculate_serialize_to_file(text2char(PG_GETARG_TEXT_P(0)),
text2char(PG_GETARG_TEXT_P(1)),
+									  PG_GETARG_BOOL(2), PG_GETARG_BOOL(3));
+
+    PG_RETURN_BOOL(true);
+}
 
 PG_FUNCTION_INFO_V1(shortest_path);
 Datum
@@ -395,3 +528,109 @@
         SRF_RETURN_DONE(funcctx);
     }
 }
+
+PG_FUNCTION_INFO_V1(fshortest_path);
+Datum
+fshortest_path(PG_FUNCTION_ARGS)
+{
+    FuncCallContext     *funcctx;
+    int                  call_cntr;
+    int                  max_calls;
+    TupleDesc            tuple_desc;
+    path_element_t      *path;
+
+    /* stuff done only on the first call of the function */
+    if (SRF_IS_FIRSTCALL())
+    {
+        MemoryContext   oldcontext;
+        int path_count;
+        int ret;
+
+        // XXX profiling messages are not thread safe
+        profstart(prof_total);
+        profstart(prof_extract);
+
+        /* create a function context for cross-call persistence */
+        funcctx = SRF_FIRSTCALL_INIT();
+
+        /* switch to memory context appropriate for multiple function calls */
+        oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+
+        ret = compute_shortest_path_from_file(text2char(PG_GETARG_TEXT_P(0)),
PG_GETARG_INT32(1),
+                                    PG_GETARG_INT32(2),
+                                    PG_GETARG_INT32(3),
+                                    PG_GETARG_BOOL(4),
+                                    PG_GETARG_BOOL(5), &path, &path_count);
+
+#ifdef DEBUG
+        DBG("Ret is %i", ret);
+        if (ret >= 0) 
+        {
+            int i;
+            for (i = 0; i < path_count; i++) 
+            {
+                DBG("Step %i vertex_id  %i ", i, path[i].vertex_id);
+                DBG("        edge_id    %i ", path[i].edge_id);
+                DBG("        cost       %f ", path[i].cost);
+            }
+        }
+#endif
+
+        /* total number of tuples to be returned */
+        funcctx->max_calls = path_count;
+        funcctx->user_fctx = path;
+
+        funcctx->tuple_desc =
BlessTupleDesc(RelationNameGetTupleDesc("path_result"));
+
+        MemoryContextSwitchTo(oldcontext);
+    }
+
+    /* stuff done on every call of the function */
+    funcctx = SRF_PERCALL_SETUP();
+
+    call_cntr = funcctx->call_cntr;
+    max_calls = funcctx->max_calls;
+    tuple_desc = funcctx->tuple_desc;
+    path = (path_element_t*) funcctx->user_fctx;
+
+    if (call_cntr < max_calls)    /* do when there is more left to send */
+    {
+        HeapTuple    tuple;
+        Datum        result;
+        Datum *values;
+        char* nulls;
+
+        values = palloc(4 * sizeof(Datum));
+        nulls = palloc(4 * sizeof(char));
+
+        values[0] = Int32GetDatum(call_cntr);
+        nulls[0] = ' ';
+        values[1] = Int32GetDatum(path[call_cntr].vertex_id);
+        nulls[1] = ' ';
+        values[2] = Int32GetDatum(path[call_cntr].edge_id);
+        nulls[2] = ' ';
+        values[3] = Float8GetDatum(path[call_cntr].cost);
+        nulls[3] = ' ';
+
+        tuple = heap_formtuple(tuple_desc, values, nulls);
+
+        /* make the tuple into a datum */
+        result = HeapTupleGetDatum(tuple);
+
+        /* clean up (this is not really necessary) */
+        pfree(values);
+        pfree(nulls);
+
+        SRF_RETURN_NEXT(funcctx, result);
+    }
+    else    /* do when there is no more left */
+    {
+        profstop("store", prof_store);
+        profstop("total", prof_total);
+#ifdef PROFILE
+        elog(NOTICE, "_________");
+#endif
+        SRF_RETURN_DONE(funcctx);
+    }
+}
Index: dijkstra.h
===================================================================
RCS file: /var/lib/cvs/public/cartoweb3/contrib/pgdijkstra/dijkstra.h,v
retrieving revision 1.4
diff -u -r1.4 dijkstra.h
--- dijkstra.h	29 Aug 2005 15:02:38 -0000	1.4
+++ dijkstra.h	3 Mar 2006 20:26:02 -0000
@@ -46,5 +46,12 @@
 int boost_dijkstra(edge_t *edges, unsigned int count, int source_vertex_id, int
target_vertex_id,
 		   bool directed, bool has_reverse_cost,
                    path_element_t **path, int *path_count, char **err_msg);
-
+#ifdef __cplusplus
+extern "C"
+#endif
+int serialize(char* filename, edge_t *edges, unsigned int count);
+#ifdef __cplusplus
+extern "C"
+#endif
+int deserialize(char* filename, edge_t *edges, unsigned int count);
 #endif
Index: dijkstra.sql.in
===================================================================
RCS file: /var/lib/cvs/public/cartoweb3/contrib/pgdijkstra/dijkstra.sql.in,v
retrieving revision 1.5
diff -u -r1.5 dijkstra.sql.in
--- dijkstra.sql.in	12 Oct 2005 08:43:54 -0000	1.5
+++ dijkstra.sql.in	23 Mar 2006 16:17:13 -0000
@@ -31,6 +31,25 @@
         AS 'MODULE_PATHNAME'
         LANGUAGE 'C' IMMUTABLE STRICT;
 
+-----------------------------------------------------------------------
+-- Core function for shortest_path computation using cached data
+-----------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION fshortest_path(filename text, count int4, source_id
int4, 
+					  target_id int4, directed bool, has_reverse_cost bool)
+	RETURNS SETOF path_result 
+	AS 'MODULE_PATHNAME'
+	LANGUAGE 'C' IMMUTABLE STRICT;
+
+-----------------------------------------------------------------------
+-- Create cached data for fshortest_path function
+-----------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION serialize_to_file(sql text, filename text, directed
bool, 
+					     has_reverse_cost bool)
+	RETURNS bool 
+	AS 'MODULE_PATHNAME'
+	LANGUAGE 'c' VOLATILE;
 
 -----------------------------------------------------------------------
 -- Drops the vertices and edges tables related to the given geom_table



------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.


Please do NOT reply to this email, use the link above instead to 
login to bugzilla and submit your comment. Any email reply to this
address will be lost.


More information about the Cartoweb-dev mailing list