Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GeoJSON: expose field names in a more consistent orders when some feature lacks some fields (refs qgis/QGIS#45139) #4552

Merged
merged 2 commits into from
Sep 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions autotest/ogr/data/geojson/sparse_fields.geojson
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": null,
"properties": {
"B": "b"
}
},
{
"type": "Feature",
"geometry": null,
"properties": {
"C": "c"
}
},
{
"type": "Feature",
"geometry": null,
"properties": {
"C": "c",
"B": "b"
}
},
{
"type": "Feature",
"geometry": null,
"properties": {
"C": "c",
"A": "a"
}
},
{
"type": "Feature",
"geometry": null,
"properties": {
"B": "a",
"A": "c"
}
},
{
"type": "Feature",
"geometry": null,
"properties": {
"X": "isolated_node"
}
},
{
"type": "Feature",
"geometry": null,
"#comment": "attempt at creating direct cycle",
"properties": {
"A": "c",
"C": "a"
}
},
{
"type": "Feature",
"geometry": null,
"#comment": "attempt at creating indirect cycle",
"properties": {
"A": "c",
"B": "b",
"C": "a"
}
},
{
"type": "Feature",
"geometry": null,
"properties": {
"A": "a",
"D": "d"
}
},
{
"type": "Feature",
"geometry": null,
"properties": {
"A": "a",
"E": "e"
}
},
{
"type": "Feature",
"geometry": null,
"properties": {
"E": "e",
"E_next": "e_next"
}
},
{
"type": "Feature",
"geometry": null,
"properties": {
"E_prev": "e_prev",
"E": "e"
}
},
{
"type": "Feature",
"geometry": null,
"properties": {
"E": "e",
"F": "f"
}
},
{
"type": "Feature",
"geometry": null,
"properties": {
"D": "d",
"F": "f"
}
}
]
}
12 changes: 11 additions & 1 deletion autotest/ogr/ogr_geojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,7 @@ def test_ogr_geojson_39():
] }""")
lyr = ds.GetLayer(0)
feat_defn = lyr.GetLayerDefn()
assert feat_defn.GetFieldDefn(1).GetName() == 'id' and feat_defn.GetFieldDefn(1).GetType() == ogr.OFTInteger
assert feat_defn.GetFieldDefn(0).GetName() == 'id' and feat_defn.GetFieldDefn(0).GetType() == ogr.OFTInteger
feat = lyr.GetNextFeature()
if feat.GetField('id') != 1:
feat.DumpReadable()
Expand Down Expand Up @@ -3163,3 +3163,13 @@ def test_ogr_geojson_export_geometry_axis_order():
# No CRS
g = ogr.CreateGeometryFromWkt('POINT (2 49)')
assert json.loads(g.ExportToJson()) == { "type": "Point", "coordinates": [ 2.0, 49.0 ] }

###############################################################################

def test_ogr_geojson_sparse_fields():

ds = ogr.Open('data/geojson/sparse_fields.geojson')
lyr = ds.GetLayer(0)
lyr_defn = lyr.GetLayerDefn()
field_names = [ lyr_defn.GetFieldDefn(i).GetName() for i in range(lyr_defn.GetFieldCount()) ]
assert field_names == [ 'C', 'B', 'A', 'D', 'E_prev', 'E', 'E_next', 'F', 'X']
253 changes: 253 additions & 0 deletions gdal/ogr/ogrsf_frmts/geojson/directedacyclicgraph.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
/******************************************************************************
*
* Project: GDAL
* Purpose: Implementation of topologic sorting over a directed acyclic graph
* Author: Even Rouault
*
******************************************************************************
* Copyright (c) 2021, Even Rouault <even dot rouault at spatialys dot com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
****************************************************************************/

#include <algorithm>
#include <list>
#include <map>
#include <set>
#include <stack>
#include <string>
#include <vector>

#include <cassert>

namespace gdal
{

// See https://en.wikipedia.org/wiki/Directed_acyclic_graph
template<class T, class V = std::string> class DirectedAcyclicGraph
{
std::set<T> nodes;
std::map<T, std::set<T>> incomingNodes; // incomingNodes[j][i] means an edge from i to j
std::map<T, std::set<T>> outgoingNodes; // outgoingNodes[i][j] means an edge from i to j
std::map<T, V> names;

public:
DirectedAcyclicGraph() = default;

void addNode(const T& i, const V& s) { nodes.insert(i); names[i] = s; }
void removeNode(const T& i);
const char* addEdge(const T& i, const T& j);
const char* removeEdge(const T& i, const T& j);
bool isTherePathFromTo(const T& i, const T& j) const;
std::vector<T> findStartingNodes() const;
std::vector<T> getTopologicalOrdering();
};

template<class T, class V>
void DirectedAcyclicGraph<T, V>::removeNode(const T& i)
{
nodes.erase(i);
names.erase(i);

{
auto incomingIter = incomingNodes.find(i);
if( incomingIter != incomingNodes.end() )
{
for( const T& j: incomingIter->second )
{
auto outgoingIter = outgoingNodes.find(j);
assert(outgoingIter != outgoingNodes.end());
auto iterJI = outgoingIter->second.find(i);
assert(iterJI != outgoingIter->second.end());
outgoingIter->second.erase(iterJI);
if( outgoingIter->second.empty() )
outgoingNodes.erase(outgoingIter);
}
incomingNodes.erase(incomingIter);
}
}

{
auto outgoingIter = outgoingNodes.find(i);
if( outgoingIter != outgoingNodes.end() )
{
for( const T& j: outgoingIter->second )
{
auto incomingIter = incomingNodes.find(j);
assert(incomingIter != incomingNodes.end());
auto iterJI = incomingIter->second.find(i);
assert(iterJI != incomingIter->second.end());
incomingIter->second.erase(iterJI);
if( incomingIter->second.empty() )
incomingNodes.erase(incomingIter);
}
outgoingNodes.erase(outgoingIter);
}
}
}

template<class T, class V>
const char* DirectedAcyclicGraph<T, V>::addEdge(const T& i, const T& j)
{
if( i == j )
{
return "self cycle";
}
const auto iterI = outgoingNodes.find(i);
if( iterI != outgoingNodes.end() &&
iterI->second.find(j) != iterI->second.end() )
{
return "already inserted edge";
}

if( nodes.find(i) == nodes.end() )
{
return "node i unknown";
}
if( nodes.find(j) == nodes.end() )
{
return "node j unknown";
}

if( isTherePathFromTo(j, i) )
{
return "can't add edge: this would cause a cycle";
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should you drop both instead and sort these two by some other means? this line will keep the attribute order feature-order-dependant still.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point, but what would you drop if we have edges A->B and B->C, and we now want to insert C->A ?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all three as they can't be treated as definitions? but you can't do that on this same pass as then next feature will push them back. you generally need to build cyclic graph and then cut off cycles explicitly.

other strategies that come to mind:

  • push the edges into a set first, order it alphabetically as tuples, push into current dag. will keep alphabetical ordering in case of conflicts.
  • count number of occurences for each edge, sort by that then alphabetically, push into current dag. will keep most common order in case of conflicts.
  • say "it will be this way" and do nothing, maybe put a note. current one still works. :)

Copy link

@Komzpa Komzpa Sep 26, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess first option can also be simplified to edge with from < to beats all conflicting edges with from > to. So in your example C->A is dropped in any order of insertion.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe I will keep suggested improvements (thanks for them) for a next time. Conflicting order is a less common case, and cannot be generated with GDAL.

}

outgoingNodes[i].insert(j);
incomingNodes[j].insert(i);
return nullptr;
}

template<class T, class V>
const char* DirectedAcyclicGraph<T, V>::removeEdge(const T& i, const T& j)
{
auto iterI = outgoingNodes.find(i);
if( iterI == outgoingNodes.end() )
return "no outgoing nodes from i";
auto iterIJ = iterI->second.find(j);
if( iterIJ == iterI->second.end() )
return "no outgoing node from i to j";
iterI->second.erase(iterIJ);
if( iterI->second.empty() )
outgoingNodes.erase(iterI);

auto iterJ = incomingNodes.find(j);
assert( iterJ != incomingNodes.end() );
auto iterJI = iterJ->second.find(i);
assert( iterJI != iterJ->second.end() );
iterJ->second.erase(iterJI);
if( iterJ->second.empty() )
incomingNodes.erase(iterJ);

return nullptr;
}

template<class T, class V>
bool DirectedAcyclicGraph<T, V>::isTherePathFromTo(const T& i, const T& j) const
{
std::set<T> plannedForVisit;
std::stack<T> toVisit;
toVisit.push(i);
plannedForVisit.insert(i);
while(!toVisit.empty())
{
const T n = toVisit.top();
toVisit.pop();
if( n == j )
return true;
const auto iter = outgoingNodes.find(n);
if( iter != outgoingNodes.end() )
{
for( const T& k: iter->second )
{
if( plannedForVisit.find(k) == plannedForVisit.end() )
{
plannedForVisit.insert(k);
toVisit.push(k);
}
}
}
}
return false;
}

template<class T, class V>
std::vector<T> DirectedAcyclicGraph<T,V>::findStartingNodes() const
{
std::vector<T> ret;
for( const auto& i: nodes )
{
if( incomingNodes.find(i) == incomingNodes.end() )
ret.emplace_back(i);
}
return ret;
}

// Kahn's algorithm: https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm
template<class T, class V>
std::vector<T> DirectedAcyclicGraph<T,V>::getTopologicalOrdering()
{
std::vector<T> ret;
ret.reserve(nodes.size());

const auto cmp = [this](const T& a, const T& b) {
return names.find(a)->second < names.find(b)->second;
};
std::set<T, decltype(cmp)> S(cmp);

const auto sn = findStartingNodes();
for( const auto& i: sn )
S.insert(i);

while( true )
{
auto iterFirst = S.begin();
if( iterFirst == S.end() )
break;
const auto n = *iterFirst;
S.erase(iterFirst);
ret.emplace_back(n);

const auto iter = outgoingNodes.find(n);
if( iter != outgoingNodes.end() )
{
// Need to take a copy as we remove edges during iteration
const auto myOutgoingNodes = iter->second;
for( const T& m: myOutgoingNodes )
{
const char* retRemoveEdge = removeEdge(n, m);
(void)retRemoveEdge;
assert(retRemoveEdge == nullptr);
if( incomingNodes.find(m) == incomingNodes.end() )
{
S.insert(m);
}
}
}
}

// Should not happen for a direct acyclic graph
assert(incomingNodes.empty());
assert(outgoingNodes.empty());

return ret;
}

} // namespace gdal
Loading