From f28f5e8137ffeb479da480dc99e4dedbeec895ab Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sun, 26 Sep 2021 20:02:01 +0200 Subject: [PATCH] GeoJSON: expose field names in a more consistent orders when some feature lacks some fields (refs qgis/QGIS#45139) --- .../ogr/data/geojson/sparse_fields.geojson | 117 ++++++++ autotest/ogr/ogr_geojson.py | 12 +- .../geojson/directedacyclicgraph.hpp | 253 ++++++++++++++++++ .../ogrsf_frmts/geojson/ogrgeojsonreader.cpp | 201 ++++++++++---- .../ogrsf_frmts/geojson/ogrgeojsonreader.h | 12 +- .../geojson/ogrgeojsonseqdriver.cpp | 19 +- .../ogrsf_frmts/geojson/ogrtopojsonreader.cpp | 60 ++++- 7 files changed, 618 insertions(+), 56 deletions(-) create mode 100644 autotest/ogr/data/geojson/sparse_fields.geojson create mode 100644 gdal/ogr/ogrsf_frmts/geojson/directedacyclicgraph.hpp diff --git a/autotest/ogr/data/geojson/sparse_fields.geojson b/autotest/ogr/data/geojson/sparse_fields.geojson new file mode 100644 index 000000000000..de49ecdd3d6b --- /dev/null +++ b/autotest/ogr/data/geojson/sparse_fields.geojson @@ -0,0 +1,117 @@ +{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": null, + "properties": { + "B": "b" + } + }, + { + "type": "Feature", + "geometry": null, + "properties": { + "C": "c" + } + }, + { + "type": "Feature", + "geometry": null, + "properties": { + "C": "c", + "B": "b" + } + }, + { + "type": "Feature", + "geometry": null, + "properties": { + "C": "c", + "A": "a" + } + }, + { + "type": "Feature", + "geometry": null, + "properties": { + "B": "a", + "A": "c" + } + }, + { + "type": "Feature", + "geometry": null, + "properties": { + "X": "isolated_node" + } + }, + { + "type": "Feature", + "geometry": null, + "#comment": "attempt at creating direct cycle", + "properties": { + "A": "c", + "C": "a" + } + }, + { + "type": "Feature", + "geometry": null, + "#comment": "attempt at creating indirect cycle", + "properties": { + "A": "c", + "B": "b", + "C": "a" + } + }, + { + "type": "Feature", + "geometry": null, + "properties": { + "A": "a", + "D": "d" + } + }, + { + "type": "Feature", + "geometry": null, + "properties": { + "A": "a", + "E": "e" + } + }, + { + "type": "Feature", + "geometry": null, + "properties": { + "E": "e", + "E_next": "e_next" + } + }, + { + "type": "Feature", + "geometry": null, + "properties": { + "E_prev": "e_prev", + "E": "e" + } + }, + { + "type": "Feature", + "geometry": null, + "properties": { + "E": "e", + "F": "f" + } + }, + { + "type": "Feature", + "geometry": null, + "properties": { + "D": "d", + "F": "f" + } + } + ] +} diff --git a/autotest/ogr/ogr_geojson.py b/autotest/ogr/ogr_geojson.py index b1680198e65e..826ba5f15bf9 100755 --- a/autotest/ogr/ogr_geojson.py +++ b/autotest/ogr/ogr_geojson.py @@ -1149,7 +1149,7 @@ def test_ogr_geojson_39(): ] }""") lyr = ds.GetLayer(0) feat_defn = lyr.GetLayerDefn() - assert feat_defn.GetFieldDefn(1).GetName() == 'id' and feat_defn.GetFieldDefn(1).GetType() == ogr.OFTInteger + assert feat_defn.GetFieldDefn(0).GetName() == 'id' and feat_defn.GetFieldDefn(0).GetType() == ogr.OFTInteger feat = lyr.GetNextFeature() if feat.GetField('id') != 1: feat.DumpReadable() @@ -3163,3 +3163,13 @@ def test_ogr_geojson_export_geometry_axis_order(): # No CRS g = ogr.CreateGeometryFromWkt('POINT (2 49)') assert json.loads(g.ExportToJson()) == { "type": "Point", "coordinates": [ 2.0, 49.0 ] } + +############################################################################### + +def test_ogr_geojson_sparse_fields(): + + ds = ogr.Open('data/geojson/sparse_fields.geojson') + lyr = ds.GetLayer(0) + lyr_defn = lyr.GetLayerDefn() + field_names = [ lyr_defn.GetFieldDefn(i).GetName() for i in range(lyr_defn.GetFieldCount()) ] + assert field_names == [ 'C', 'B', 'A', 'D', 'E_prev', 'E', 'E_next', 'F', 'X'] diff --git a/gdal/ogr/ogrsf_frmts/geojson/directedacyclicgraph.hpp b/gdal/ogr/ogrsf_frmts/geojson/directedacyclicgraph.hpp new file mode 100644 index 000000000000..530b45f4b065 --- /dev/null +++ b/gdal/ogr/ogrsf_frmts/geojson/directedacyclicgraph.hpp @@ -0,0 +1,253 @@ +/****************************************************************************** + * + * Project: GDAL + * Purpose: Implementation of topologic sorting over a directed acyclic graph + * Author: Even Rouault + * + ****************************************************************************** + * Copyright (c) 2021, Even Rouault + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + ****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace gdal +{ + +// See https://en.wikipedia.org/wiki/Directed_acyclic_graph +template class DirectedAcyclicGraph +{ + std::set nodes; + std::map> incomingNodes; // incomingNodes[j][i] means an edge from i to j + std::map> outgoingNodes; // outgoingNodes[i][j] means an edge from i to j + std::map names; + +public: + DirectedAcyclicGraph() = default; + + void addNode(const T& i, const V& s) { nodes.insert(i); names[i] = s; } + void removeNode(const T& i); + const char* addEdge(const T& i, const T& j); + const char* removeEdge(const T& i, const T& j); + bool isTherePathFromTo(const T& i, const T& j) const; + std::vector findStartingNodes() const; + std::vector getTopologicalOrdering(); +}; + +template +void DirectedAcyclicGraph::removeNode(const T& i) +{ + nodes.erase(i); + names.erase(i); + + { + auto incomingIter = incomingNodes.find(i); + if( incomingIter != incomingNodes.end() ) + { + for( const T& j: incomingIter->second ) + { + auto outgoingIter = outgoingNodes.find(j); + assert(outgoingIter != outgoingNodes.end()); + auto iterJI = outgoingIter->second.find(i); + assert(iterJI != outgoingIter->second.end()); + outgoingIter->second.erase(iterJI); + if( outgoingIter->second.empty() ) + outgoingNodes.erase(outgoingIter); + } + incomingNodes.erase(incomingIter); + } + } + + { + auto outgoingIter = outgoingNodes.find(i); + if( outgoingIter != outgoingNodes.end() ) + { + for( const T& j: outgoingIter->second ) + { + auto incomingIter = incomingNodes.find(j); + assert(incomingIter != incomingNodes.end()); + auto iterJI = incomingIter->second.find(i); + assert(iterJI != incomingIter->second.end()); + incomingIter->second.erase(iterJI); + if( incomingIter->second.empty() ) + incomingNodes.erase(incomingIter); + } + outgoingNodes.erase(outgoingIter); + } + } +} + +template +const char* DirectedAcyclicGraph::addEdge(const T& i, const T& j) +{ + if( i == j ) + { + return "self cycle"; + } + const auto iterI = outgoingNodes.find(i); + if( iterI != outgoingNodes.end() && + iterI->second.find(j) != iterI->second.end() ) + { + return "already inserted edge"; + } + + if( nodes.find(i) == nodes.end() ) + { + return "node i unknown"; + } + if( nodes.find(j) == nodes.end() ) + { + return "node j unknown"; + } + + if( isTherePathFromTo(j, i) ) + { + return "can't add edge: this would cause a cycle"; + } + + outgoingNodes[i].insert(j); + incomingNodes[j].insert(i); + return nullptr; +} + +template +const char* DirectedAcyclicGraph::removeEdge(const T& i, const T& j) +{ + auto iterI = outgoingNodes.find(i); + if( iterI == outgoingNodes.end() ) + return "no outgoing nodes from i"; + auto iterIJ = iterI->second.find(j); + if( iterIJ == iterI->second.end() ) + return "no outgoing node from i to j"; + iterI->second.erase(iterIJ); + if( iterI->second.empty() ) + outgoingNodes.erase(iterI); + + auto iterJ = incomingNodes.find(j); + assert( iterJ != incomingNodes.end() ); + auto iterJI = iterJ->second.find(i); + assert( iterJI != iterJ->second.end() ); + iterJ->second.erase(iterJI); + if( iterJ->second.empty() ) + incomingNodes.erase(iterJ); + + return nullptr; +} + +template +bool DirectedAcyclicGraph::isTherePathFromTo(const T& i, const T& j) const +{ + std::set plannedForVisit; + std::stack toVisit; + toVisit.push(i); + plannedForVisit.insert(i); + while(!toVisit.empty()) + { + const T n = toVisit.top(); + toVisit.pop(); + if( n == j ) + return true; + const auto iter = outgoingNodes.find(n); + if( iter != outgoingNodes.end() ) + { + for( const T& k: iter->second ) + { + if( plannedForVisit.find(k) == plannedForVisit.end() ) + { + plannedForVisit.insert(k); + toVisit.push(k); + } + } + } + } + return false; +} + +template +std::vector DirectedAcyclicGraph::findStartingNodes() const +{ + std::vector ret; + for( const auto& i: nodes ) + { + if( incomingNodes.find(i) == incomingNodes.end() ) + ret.emplace_back(i); + } + return ret; +} + +// Kahn's algorithm: https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm +template +std::vector DirectedAcyclicGraph::getTopologicalOrdering() +{ + std::vector ret; + ret.reserve(nodes.size()); + + const auto cmp = [this](const T& a, const T& b) { + return names.find(a)->second < names.find(b)->second; + }; + std::set S(cmp); + + const auto sn = findStartingNodes(); + for( const auto& i: sn ) + S.insert(i); + + while( true ) + { + auto iterFirst = S.begin(); + if( iterFirst == S.end() ) + break; + const auto n = *iterFirst; + S.erase(iterFirst); + ret.emplace_back(n); + + const auto iter = outgoingNodes.find(n); + if( iter != outgoingNodes.end() ) + { + // Need to take a copy as we remove edges during iteration + const auto myOutgoingNodes = iter->second; + for( const T& m: myOutgoingNodes ) + { + const char* retRemoveEdge = removeEdge(n, m); + (void)retRemoveEdge; + assert(retRemoveEdge == nullptr); + if( incomingNodes.find(m) == incomingNodes.end() ) + { + S.insert(m); + } + } + } + } + + // Should not happen for a direct acyclic graph + assert(incomingNodes.empty()); + assert(outgoingNodes.empty()); + + return ret; +} + +} // namespace gdal diff --git a/gdal/ogr/ogrsf_frmts/geojson/ogrgeojsonreader.cpp b/gdal/ogr/ogrsf_frmts/geojson/ogrgeojsonreader.cpp index 3d685cbd307d..b20255f3a5df 100644 --- a/gdal/ogr/ogrsf_frmts/geojson/ogrgeojsonreader.cpp +++ b/gdal/ogr/ogrsf_frmts/geojson/ogrgeojsonreader.cpp @@ -104,6 +104,10 @@ class OGRGeoJSONReaderStreamingParser: public CPLJSonStreamingParser bool m_bStartFeature = false; bool m_bEndFeature = false; + std::map m_oMapFieldNameToIdx{}; + std::vector> m_apoFieldDefn{}; + gdal::DirectedAcyclicGraph m_dag{}; + void AppendObject(json_object* poNewObj); void AnalyzeFeature(); void TooComplex(); @@ -132,6 +136,8 @@ class OGRGeoJSONReaderStreamingParser: public CPLJSonStreamingParser virtual void Exception(const char* /*pszMessage*/) override; + void FinalizeLayerDefn(); + OGRFeature* GetNextFeature(); json_object* StealRootObject(); inline bool IsTypeKnown() const { return m_bIsTypeKnown; } @@ -399,7 +405,10 @@ void OGRGeoJSONReaderStreamingParser::AppendObject(json_object* poNewObj) void OGRGeoJSONReaderStreamingParser::AnalyzeFeature() { - if( !m_oReader.GenerateFeatureDefn( m_poLayer, m_poCurObj ) ) + if( !m_oReader.GenerateFeatureDefn( m_oMapFieldNameToIdx, + m_apoFieldDefn, + m_dag, + m_poLayer, m_poCurObj ) ) { } m_poLayer->IncFeatureCount(); @@ -526,6 +535,24 @@ void OGRGeoJSONReaderStreamingParser::EndObject() } } +/************************************************************************/ +/* FinalizeLayerDefn() */ +/************************************************************************/ + +void OGRGeoJSONReaderStreamingParser::FinalizeLayerDefn() +{ + OGRFeatureDefn* poDefn = m_poLayer->GetLayerDefn(); + const auto sortedFields = m_dag.getTopologicalOrdering(); + CPLAssert( sortedFields.size() == m_apoFieldDefn.size() ); + for( int idx: sortedFields ) + { + poDefn->AddFieldDefn(m_apoFieldDefn[idx].get()); + } + m_dag = gdal::DirectedAcyclicGraph(); + m_oMapFieldNameToIdx.clear(); + m_apoFieldDefn.clear(); +} + /************************************************************************/ /* StartObjectMember() */ /************************************************************************/ @@ -958,6 +985,8 @@ bool OGRGeoJSONReader::FirstPassReadLayer( OGRGeoJSONDataSource* poDS, return false; } + oParser.FinalizeLayerDefn(); + CPLString osFIDColumn; FinalizeLayerDefn(poLayer, osFIDColumn); if( !osFIDColumn.empty() ) @@ -1581,10 +1610,15 @@ bool OGRGeoJSONReader::GenerateLayerDefn( OGRGeoJSONLayer* poLayer, /* -------------------------------------------------------------------- */ bool bSuccess = true; + std::map oMapFieldNameToIdx; + std::vector> apoFieldDefn; + gdal::DirectedAcyclicGraph dag; + GeoJSONObject::Type objType = OGRGeoJSONGetType( poGJObject ); if( GeoJSONObject::eFeature == objType ) { - bSuccess = GenerateFeatureDefn( poLayer, poGJObject ); + bSuccess = GenerateFeatureDefn( oMapFieldNameToIdx, apoFieldDefn, dag, + poLayer, poGJObject ); } else if( GeoJSONObject::eFeatureCollection == objType ) { @@ -1598,7 +1632,8 @@ bool OGRGeoJSONReader::GenerateLayerDefn( OGRGeoJSONLayer* poLayer, { json_object* poObjFeature = json_object_array_get_idx( poObjFeatures, i ); - if( !GenerateFeatureDefn( poLayer, poObjFeature ) ) + if( !GenerateFeatureDefn( oMapFieldNameToIdx, apoFieldDefn, dag, + poLayer, poObjFeature ) ) { CPLDebug( "GeoJSON", "Create feature schema failure." ); bSuccess = false; @@ -1614,6 +1649,14 @@ bool OGRGeoJSONReader::GenerateLayerDefn( OGRGeoJSONLayer* poLayer, } } + OGRFeatureDefn* poDefn = poLayer->GetLayerDefn(); + const auto sortedFields = dag.getTopologicalOrdering(); + CPLAssert( sortedFields.size() == apoFieldDefn.size() ); + for( int idx: sortedFields ) + { + poDefn->AddFieldDefn(apoFieldDefn[idx].get()); + } + CPLString osFIDColumn; FinalizeLayerDefn(poLayer, osFIDColumn); if( !osFIDColumn.empty() ) @@ -1656,7 +1699,9 @@ void OGRGeoJSONBaseReader::FinalizeLayerDefn(OGRLayer* poLayer, /************************************************************************/ void OGRGeoJSONReaderAddOrUpdateField( - OGRFeatureDefn* poDefn, + std::vector& retIndices, + std::map& oMapFieldNameToIdx, + std::vector>& apoFieldDefn, const char* pszKey, json_object* poVal, bool bFlattenNestedAttributes, @@ -1681,7 +1726,10 @@ void OGRGeoJSONReaderAddOrUpdateField( if( it.val != nullptr && json_object_get_type(it.val) == json_type_object ) { - OGRGeoJSONReaderAddOrUpdateField(poDefn, osAttrName, it.val, + OGRGeoJSONReaderAddOrUpdateField(retIndices, + oMapFieldNameToIdx, + apoFieldDefn, + osAttrName, it.val, true, chNestedAttributeSeparator, bArrayAsString, @@ -1690,7 +1738,10 @@ void OGRGeoJSONReaderAddOrUpdateField( } else { - OGRGeoJSONReaderAddOrUpdateField(poDefn, osAttrName, it.val, + OGRGeoJSONReaderAddOrUpdateField(retIndices, + oMapFieldNameToIdx, + apoFieldDefn, + osAttrName, it.val, false, 0, bArrayAsString, bDateAsString, @@ -1700,28 +1751,33 @@ void OGRGeoJSONReaderAddOrUpdateField( return; } - int nIndex = poDefn->GetFieldIndexCaseSensitive(pszKey); - if( nIndex < 0 ) + auto oMapFieldNameToIdxIter = oMapFieldNameToIdx.find(pszKey); + if( oMapFieldNameToIdxIter == oMapFieldNameToIdx.end() ) { OGRFieldSubType eSubType; const OGRFieldType eType = GeoJSONPropertyToFieldType( poVal, eSubType, bArrayAsString ); - OGRFieldDefn fldDefn( pszKey, eType ); - fldDefn.SetSubType(eSubType); + auto poFieldDefn = cpl::make_unique( pszKey, eType ); + poFieldDefn->SetSubType(eSubType); if( eSubType == OFSTBoolean ) - fldDefn.SetWidth(1); - if( fldDefn.GetType() == OFTString && !bDateAsString ) + poFieldDefn->SetWidth(1); + if( poFieldDefn->GetType() == OFTString && !bDateAsString ) { - fldDefn.SetType(GeoJSONStringPropertyToFieldType( poVal )); + poFieldDefn->SetType(GeoJSONStringPropertyToFieldType( poVal )); } - poDefn->AddFieldDefn( &fldDefn ); + apoFieldDefn.emplace_back(std::move(poFieldDefn)); + const int nIndex = static_cast(apoFieldDefn.size()) - 1; + retIndices.emplace_back(nIndex); + oMapFieldNameToIdx[pszKey] = nIndex; if( poVal == nullptr ) - aoSetUndeterminedTypeFields.insert( poDefn->GetFieldCount() - 1 ); + aoSetUndeterminedTypeFields.insert(nIndex); } else if( poVal ) { + const int nIndex = oMapFieldNameToIdxIter->second; + retIndices.emplace_back(nIndex); // If there is a null value: do not update field definition. - OGRFieldDefn* poFDefn = poDefn->GetFieldDefn(nIndex); + OGRFieldDefn* poFDefn = apoFieldDefn[nIndex].get(); const OGRFieldType eType = poFDefn->GetType(); if( aoSetUndeterminedTypeFields.find(nIndex) != aoSetUndeterminedTypeFields.end() ) @@ -1929,17 +1985,22 @@ void OGRGeoJSONReaderAddOrUpdateField( poFDefn->SetWidth( poFDefn->GetSubType() == OFSTBoolean ? 1 : 0 ); } + else + { + const int nIndex = oMapFieldNameToIdxIter->second; + retIndices.emplace_back(nIndex); + } } /************************************************************************/ /* GenerateFeatureDefn() */ /************************************************************************/ -bool OGRGeoJSONBaseReader::GenerateFeatureDefn( OGRLayer* poLayer, +bool OGRGeoJSONBaseReader::GenerateFeatureDefn( std::map& oMapFieldNameToIdx, + std::vector>& apoFieldDefn, + gdal::DirectedAcyclicGraph& dag, + OGRLayer* poLayer, json_object* poObj ) { - OGRFeatureDefn* poDefn = poLayer->GetLayerDefn(); - CPLAssert( nullptr != poDefn ); - /* -------------------------------------------------------------------- */ /* Read collection of properties. */ /* -------------------------------------------------------------------- */ @@ -1949,11 +2010,14 @@ bool OGRGeoJSONBaseReader::GenerateFeatureDefn( OGRLayer* poLayer, static_cast( poObjPropsEntry ? poObjPropsEntry->v : nullptr)); + std::vector anCurFieldIndices; + int nPrevFieldIdx = -1; + json_object* poObjId = OGRGeoJSONFindMemberByName( poObj, "id" ); if( poObjId ) { - const int nIdx = poDefn->GetFieldIndexCaseSensitive( "id" ); - if( nIdx < 0 ) + auto iterIdxId = oMapFieldNameToIdx.find("id"); + if( iterIdxId == oMapFieldNameToIdx.end() ) { if( json_object_get_type(poObjId) == json_type_int ) { @@ -1993,25 +2057,34 @@ bool OGRGeoJSONBaseReader::GenerateFeatureDefn( OGRLayer* poLayer, else eType = OFTInteger64; } - OGRFieldDefn fldDefn( "id", eType ); - poDefn->AddFieldDefn(&fldDefn); + apoFieldDefn.emplace_back( + cpl::make_unique("id", eType)); + const int nIdx = static_cast(apoFieldDefn.size()) - 1; + oMapFieldNameToIdx["id"] = nIdx; + nPrevFieldIdx = nIdx; + dag.addNode(nIdx, "id"); bFeatureLevelIdAsAttribute_ = true; } } } - else if( bFeatureLevelIdAsAttribute_ && - json_object_get_type(poObjId) == json_type_int ) + else { - if( poDefn->GetFieldDefn(nIdx)->GetType() == OFTInteger ) + const int nIdx = iterIdxId->second; + nPrevFieldIdx = nIdx; + if( bFeatureLevelIdAsAttribute_ && + json_object_get_type(poObjId) == json_type_int ) + { + if( apoFieldDefn[nIdx]->GetType() == OFTInteger ) + { + if( !CPL_INT64_FITS_ON_INT32( json_object_get_int64(poObjId) ) ) + apoFieldDefn[nIdx]->SetType(OFTInteger64); + } + } + else if( bFeatureLevelIdAsAttribute_ ) { - if( !CPL_INT64_FITS_ON_INT32( json_object_get_int64(poObjId) ) ) - poDefn->GetFieldDefn(nIdx)->SetType(OFTInteger64); + apoFieldDefn[nIdx]->SetType(OFTString); } } - else if( bFeatureLevelIdAsAttribute_ ) - { - poDefn->GetFieldDefn(nIdx)->SetType(OFTString); - } } @@ -2075,8 +2148,8 @@ bool OGRGeoJSONBaseReader::GenerateFeatureDefn( OGRLayer* poLayer, it.entry = nullptr; json_object_object_foreachC( poObjProps, it ) { - int nFldIndex = poDefn->GetFieldIndexCaseSensitive( it.key ); - if( -1 == nFldIndex && !bIsGeocouchSpatiallistFormat ) + if( !bIsGeocouchSpatiallistFormat && + oMapFieldNameToIdx.find(it.key) == oMapFieldNameToIdx.end() ) { // Detect the special kind of GeoJSON output by a spatiallist of // GeoCouch such as: @@ -2108,19 +2181,43 @@ bool OGRGeoJSONBaseReader::GenerateFeatureDefn( OGRLayer* poLayer, "TRUE")); if( bFlattenGeocouchSpatiallistFormat ) { - poDefn->DeleteFieldDefn(poDefn->GetFieldIndexCaseSensitive("type")); + auto typeIter = oMapFieldNameToIdx.find("type"); + if( typeIter != oMapFieldNameToIdx.end() ) + { + const int nIdx = typeIter->second; + apoFieldDefn.erase(apoFieldDefn.begin() + nIdx); + oMapFieldNameToIdx.erase(typeIter); + dag.removeNode(nIdx); + } + bIsGeocouchSpatiallistFormat = true; - return GenerateFeatureDefn(poLayer, poObj); + return GenerateFeatureDefn(oMapFieldNameToIdx, + apoFieldDefn, + dag, + poLayer, poObj); } } } - OGRGeoJSONReaderAddOrUpdateField(poDefn, it.key, it.val, + anCurFieldIndices.clear(); + OGRGeoJSONReaderAddOrUpdateField(anCurFieldIndices, + oMapFieldNameToIdx, + apoFieldDefn, + it.key, it.val, bFlattenNestedAttributes_, chNestedAttributeSeparator_, bArrayAsString_, bDateAsString_, aoSetUndeterminedTypeFields_); + for( int idx: anCurFieldIndices ) + { + dag.addNode(idx, apoFieldDefn[idx]->GetNameRef()); + if( nPrevFieldIdx != -1 ) + { + dag.addEdge(nPrevFieldIdx, idx); + } + nPrevFieldIdx = idx; + } } bSuccess = true; // SUCCESS @@ -2147,15 +2244,27 @@ bool OGRGeoJSONBaseReader::GenerateFeatureDefn( OGRLayer* poLayer, strcmp(it.key, "bbox") != 0 && strcmp(it.key, "center") != 0 ) { - int nFldIndex = poDefn->GetFieldIndexCaseSensitive( it.key ); - if( -1 == nFldIndex ) + if( oMapFieldNameToIdx.find(it.key) == oMapFieldNameToIdx.end() ) { - OGRGeoJSONReaderAddOrUpdateField(poDefn, it.key, it.val, - bFlattenNestedAttributes_, - chNestedAttributeSeparator_, - bArrayAsString_, - bDateAsString_, - aoSetUndeterminedTypeFields_); + anCurFieldIndices.clear(); + OGRGeoJSONReaderAddOrUpdateField(anCurFieldIndices, + oMapFieldNameToIdx, + apoFieldDefn, + it.key, it.val, + bFlattenNestedAttributes_, + chNestedAttributeSeparator_, + bArrayAsString_, + bDateAsString_, + aoSetUndeterminedTypeFields_); + for( int idx: anCurFieldIndices ) + { + dag.addNode(idx, apoFieldDefn[idx]->GetNameRef()); + if( nPrevFieldIdx != -1 ) + { + dag.addEdge(nPrevFieldIdx, idx); + } + nPrevFieldIdx = idx; + } } } } diff --git a/gdal/ogr/ogrsf_frmts/geojson/ogrgeojsonreader.h b/gdal/ogr/ogrsf_frmts/geojson/ogrgeojsonreader.h index d75f31169933..e3db4f7cac4b 100644 --- a/gdal/ogr/ogrsf_frmts/geojson/ogrgeojsonreader.h +++ b/gdal/ogr/ogrsf_frmts/geojson/ogrgeojsonreader.h @@ -36,9 +36,12 @@ #include "ogrsf_frmts.h" #include "ogrgeojsonutils.h" +#include "directedacyclicgraph.hpp" #include +#include #include +#include /************************************************************************/ /* FORWARD DECLARATIONS */ @@ -101,7 +104,10 @@ class OGRGeoJSONBaseReader void SetArrayAsString( bool bArrayAsString ); void SetDateAsString( bool bDateAsString ); - bool GenerateFeatureDefn( OGRLayer* poLayer, json_object* poObj ); + bool GenerateFeatureDefn( std::map& oMapFieldNameToIdx, + std::vector>& apoFieldDefn, + gdal::DirectedAcyclicGraph& dag, + OGRLayer* poLayer, json_object* poObj ); void FinalizeLayerDefn( OGRLayer* poLayer, CPLString& osFIDColumn ); OGRGeometry* ReadGeometry( json_object* poObj, OGRSpatialReference* poLayerSRS ); @@ -213,7 +219,9 @@ void OGRGeoJSONReaderSetField( OGRLayer* poLayer, bool bFlattenNestedAttributes, char chNestedAttributeSeparator ); void OGRGeoJSONReaderAddOrUpdateField( - OGRFeatureDefn* poDefn, + std::vector& retIndices, + std::map& oMapFieldNameToIdx, + std::vector>& apoFieldDefn, const char* pszKey, json_object* poVal, bool bFlattenNestedAttributes, diff --git a/gdal/ogr/ogrsf_frmts/geojson/ogrgeojsonseqdriver.cpp b/gdal/ogr/ogrsf_frmts/geojson/ogrgeojsonseqdriver.cpp index ddd5242cf893..930dc5f3bab7 100644 --- a/gdal/ogr/ogrsf_frmts/geojson/ogrgeojsonseqdriver.cpp +++ b/gdal/ogr/ogrsf_frmts/geojson/ogrgeojsonseqdriver.cpp @@ -135,7 +135,7 @@ class OGRGeoJSONSeqWriteLayer final: public OGRLayer int TestCapability( const char* pszCap ) override; private: - OGRGeoJSONSeqDataSource* m_poDS = nullptr; + OGRGeoJSONSeqDataSource* m_poDS = nullptr; OGRFeatureDefn* m_poFeatureDefn = nullptr; OGRCoordinateTransformation* m_poCT = nullptr; @@ -291,6 +291,10 @@ bool OGRGeoJSONSeqLayer::Init(bool bLooseIdentification) ResetReading(); + std::map oMapFieldNameToIdx; + std::vector> apoFieldDefn; + gdal::DirectedAcyclicGraph dag; + while( true ) { auto poObject = GetNextObject(bLooseIdentification); @@ -298,12 +302,23 @@ bool OGRGeoJSONSeqLayer::Init(bool bLooseIdentification) break; if( OGRGeoJSONGetType(poObject) == GeoJSONObject::eFeature ) { - m_oReader.GenerateFeatureDefn(this, poObject); + m_oReader.GenerateFeatureDefn(oMapFieldNameToIdx, + apoFieldDefn, + dag, + this, poObject); } json_object_put(poObject); m_nTotalFeatures ++; } + OGRFeatureDefn* poDefn = GetLayerDefn(); + const auto sortedFields = dag.getTopologicalOrdering(); + CPLAssert( sortedFields.size() == apoFieldDefn.size() ); + for( int idx: sortedFields ) + { + poDefn->AddFieldDefn(apoFieldDefn[idx].get()); + } + ResetReading(); m_nFileSize = 0; diff --git a/gdal/ogr/ogrsf_frmts/geojson/ogrtopojsonreader.cpp b/gdal/ogr/ogrsf_frmts/geojson/ogrtopojsonreader.cpp index 88206b91be57..5bb24456d0ac 100644 --- a/gdal/ogr/ogrsf_frmts/geojson/ogrtopojsonreader.cpp +++ b/gdal/ogr/ogrsf_frmts/geojson/ogrtopojsonreader.cpp @@ -426,7 +426,10 @@ static void ParseObject( const char* pszId, /* EstablishLayerDefn() */ /************************************************************************/ -static void EstablishLayerDefn( OGRFeatureDefn* poDefn, +static void EstablishLayerDefn( std::vector& anCurFieldIndices, + std::map& oMapFieldNameToIdx, + std::vector>& apoFieldDefn, + gdal::DirectedAcyclicGraph& dag, json_object* poObj, std::set& aoSetUndeterminedTypeFields ) { @@ -439,11 +442,25 @@ static void EstablishLayerDefn( OGRFeatureDefn* poDefn, it.val = nullptr; it.entry = nullptr; + int nPrevFieldIdx = -1; json_object_object_foreachC( poObjProps, it ) { - OGRGeoJSONReaderAddOrUpdateField(poDefn, it.key, it.val, + anCurFieldIndices.clear(); + OGRGeoJSONReaderAddOrUpdateField(anCurFieldIndices, + oMapFieldNameToIdx, + apoFieldDefn, + it.key, it.val, false, 0, false, false, aoSetUndeterminedTypeFields); + for( int idx: anCurFieldIndices ) + { + dag.addNode(idx, apoFieldDefn[idx]->GetNameRef()); + if( nPrevFieldIdx != -1 ) + { + dag.addEdge(nPrevFieldIdx, idx); + } + nPrevFieldIdx = idx; + } } } } @@ -500,6 +517,11 @@ static bool ParseObjectMain( const char* pszId, json_object* poObj, const auto nGeometries = json_object_array_length(poGeometries); // First pass to establish schema. + std::vector anCurFieldIndices; + std::map oMapFieldNameToIdx; + std::vector> apoFieldDefn; + gdal::DirectedAcyclicGraph dag; + for( auto i = decltype(nGeometries){0}; i < nGeometries; i++ ) { @@ -508,11 +530,22 @@ static bool ParseObjectMain( const char* pszId, json_object* poObj, if( poGeom != nullptr && json_type_object == json_object_get_type( poGeom ) ) { - EstablishLayerDefn(poDefn, poGeom, + EstablishLayerDefn(anCurFieldIndices, + oMapFieldNameToIdx, + apoFieldDefn, + dag, + poGeom, aoSetUndeterminedTypeFields); } } + const auto sortedFields = dag.getTopologicalOrdering(); + CPLAssert( sortedFields.size() == apoFieldDefn.size() ); + for( int idx: sortedFields ) + { + poDefn->AddFieldDefn(apoFieldDefn[idx].get()); + } + // Second pass to build objects. for( auto i = decltype(nGeometries){0}; i < nGeometries; i++ ) @@ -550,9 +583,26 @@ static bool ParseObjectMain( const char* pszId, json_object* poObj, GetLayerDefn()->AddFieldDefn( &fldDefn ); } } - OGRFeatureDefn* poDefn = (*ppoMainLayer)->GetLayerDefn(); - EstablishLayerDefn(poDefn, poObj, + std::vector anCurFieldIndices; + std::map oMapFieldNameToIdx; + std::vector> apoFieldDefn; + gdal::DirectedAcyclicGraph dag; + + EstablishLayerDefn(anCurFieldIndices, + oMapFieldNameToIdx, + apoFieldDefn, + dag, + poObj, aoSetUndeterminedTypeFields); + OGRFeatureDefn* poDefn = (*ppoMainLayer)->GetLayerDefn(); + + const auto sortedFields = dag.getTopologicalOrdering(); + CPLAssert( sortedFields.size() == apoFieldDefn.size() ); + for( int idx: sortedFields ) + { + poDefn->AddFieldDefn(apoFieldDefn[idx].get()); + } + bNeedSecondPass = true; } else