-
Notifications
You must be signed in to change notification settings - Fork 2
/
gSSURGO_ValidateData.py
796 lines (628 loc) · 31.5 KB
/
gSSURGO_ValidateData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
# gSSURGO_ValidateData.py
#
# Steve Peaslee, National Soil Survey Center
# 2014-09-27
# Adapted from gSSURGO_ValuTable.py
#
# Checks for some basic data population problems at the mapunit, component and horizon levels
# 2017-02-03 Ran into MEMORY limit with CONUS. Split the last function up to get
# memory usage back down.
"""
SELECT l.areasymbol, m.musym, m.mukey, m.muname, sum(c.comppct_r) as sum_pct
FROM mapunit m
INNER JOIN legend l ON m.lkey = l.lkey
INNER JOIN component c ON m.mukey = c.mukey
WHERE m.MUKEY IN (xxMUKEYSxx)
GROUP BY l.areasymbol, m.mukey, m.musym, m.muname
ORDER BY l.areasymbol, m.musym
"""
#
## ===================================================================================
class MyError(Exception):
pass
## ===================================================================================
def errorMsg():
try:
tb = sys.exc_info()[2]
tbinfo = traceback.format_tb(tb)[0]
theMsg = tbinfo + " \n" + str(sys.exc_type)+ ": " + str(sys.exc_value) + " \n"
PrintMsg(theMsg, 2)
except:
PrintMsg("Unhandled error in errorMsg method", 2)
pass
## ===================================================================================
def PrintMsg(msg, severity=0):
# Adds tool message to the geoprocessor
#
#Split the message on \n first, so that if it's multiple lines, a GPMessage will be added for each line
try:
for string in msg.split('\n'):
#Add a geoprocessing message (in case this is run as a tool)
if severity == 0:
arcpy.AddMessage(string)
elif severity == 1:
arcpy.AddWarning(string)
elif severity == 2:
arcpy.AddMessage(" ")
arcpy.AddError(string)
except:
pass
## ===================================================================================
def Number_Format(num, places=0, bCommas=True):
try:
# Format a number according to locality and given places
#locale.setlocale(locale.LC_ALL, "")
locale.getlocale()
if bCommas:
theNumber = locale.format("%.*f", (places, num), True)
else:
theNumber = locale.format("%.*f", (places, num), False)
return theNumber
except:
PrintMsg("Unhandled exception in Number_Format function (" + str(num) + ")", 2)
return False
## ===================================================================================
def GetLastDate(inputDB):
# Get the most recent date 'YYYYMMDD' from SACATALOG.SAVEREST and use it to populate metadata
#
try:
tbl = os.path.join(inputDB, "SACATALOG")
today = ""
sqlClause = [None, "ORDER_BY SAVEREST DESC"]
with arcpy.da.SearchCursor(tbl, ['SAVEREST'], sql_clause=sqlClause) as cur:
for rec in cur:
#lastDate = rec[0].split(" ")[0].replace("-", "")
lastDate = rec[0].strftime('%Y%m%d')
break
return lastDate
except MyError, e:
# Example: raise MyError("this is an error message")
PrintMsg(str(e) + " \n", 2)
return ""
except:
errorMsg()
return ""
## ===================================================================================
def GetSumPct(inputDB):
# Get map unit - sum of component percent for all components and also for major-earthy components
# load sum of comppct_r into a dictionary.
# Value[0] is for all components,
# Value[1] is just for major-earthy components,
# Value[2] is all major components
# Value[3] is earthy components
#
# Do I need to add another option for earthy components?
# WSS and SDV use all components with data for AWS.
try:
pctSQL = "comppct_r is not null"
pctFlds = ["mukey", "compkind", "majcompflag", "comppct_r"]
dPct = dict()
with arcpy.da.SearchCursor(os.path.join(inputDB, "component"), pctFlds, where_clause=pctSQL) as pctCur:
for rec in pctCur:
mukey, compkind, flag, comppct = rec
m = 0 # major component percent
me = 0 # major-earthy component percent
e = 0 # earthy component percent
if flag == 'Yes':
# major component percent
m = comppct
if not compkind in ["Miscellaneous area", ""]:
# major-earthy component percent
me = comppct
e = comppct
else:
me = 0
elif not compkind in ["Miscellaneous area", ""]:
e = comppct
if mukey in dPct:
# This mapunit has a pair of values already
# Get the existing values from the dictionary
#pctAll, pctMjr = dPct[mukey] # all components, major-earthy
pctAll, pctME, pctMjr, pctE = dPct[mukey]
dPct[mukey] = (pctAll + comppct, pctME + me, pctMjr + m, pctE + e)
else:
# this is the first component for this map unit
dPct[mukey] = (comppct, me, m, e)
return dPct
except MyError, e:
# Example: raise MyError("this is an error message")
PrintMsg(str(e) + " \n", 2)
return dict()
except:
errorMsg()
return dict()
## ===================================================================================
def CreateQueryTables(inputDB, outputDB, maxD, dPct):
# Assemble a table containing mapunit, component and horizon data.
# ArcGIS cannot perform a proper outer join, so it has to be done the hard way.
#
try:
env.workspace = inputDB
#queryMU = "MU"
#queryCO = "CO"
#queryHZ = "HZ"
#queryCR = "CR"
#queryCT = "CT"
queryTemp = "Tmp" # empty table uses as template
# Mapunit query
PrintMsg(" \nReading MAPUNIT table...", 0)
whereClause = ""
fldMu = [["mukey", "mukey"], ["musym", "musym"], ["muname", "muname"]]
fldMu2 = list()
dMu = dict()
sqlClause = (None, "ORDER BY mukey")
for fld in fldMu:
fldMu2.append(fld[0])
muList = list()
muTbl = os.path.join(inputDB, "mapunit")
with arcpy.da.SearchCursor(muTbl, fldMu2, sql_clause=sqlClause) as mcur:
for mrec in mcur:
rec = list(mrec)
mukey = rec[0]
rec.pop(0)
dMu[mukey] = rec
muList.append(mukey)
muList.sort()
del muTbl
# Component query
PrintMsg(" \nReading COMPONENT table...", 0)
fldCo = [["mukey", "mukey"], ["cokey", "cokey"], ["comppct_r", "comppct_r"], ["majcompflag", "majcompflag"], \
["compname", "compname"], ["compkind", "compkind"], ["taxorder", "taxorder"], ["taxsubgrp", "taxsubgrp"], \
["localphase", "localphase"], ["otherph", "otherph"], ["hydricrating", "hydricrating"], ["drainagecl", "drainagecl"]]
fldCo2 = list()
dCo = dict()
whereClause = "comppct_r is not NULL"
sqlClause = (None, "ORDER BY cokey, comppct_r DESC")
coTbl = os.path.join(inputDB, "component")
for fld in fldCo:
fldCo2.append(fld[0])
with arcpy.da.SearchCursor(coTbl, fldCo2, where_clause=whereClause, sql_clause=sqlClause) as ccur:
for crec in ccur:
rec = list(crec)
mukey = rec.pop(0) # get rid of mukey from component record
try:
# Add next component record to list
dCo[mukey].append(rec)
except:
# initialize list of records
dCo[mukey] = [rec]
del coTbl
# HORIZON TABLE
PrintMsg(" \nReading HORIZON table...", 0)
fldHz = [["cokey", "cokey"], ["chkey", "chkey"], ["hzname", "hzname"], ["desgnmaster", "desgnmaster"], \
["hzdept_r", "hzdept_r"], ["hzdepb_r", "hzdepb_r"], ["sandtotal_r", "sandtotal_r"], \
["silttotal_r", "silttotal_r"], ["claytotal_r", "claytotal_r"], ["om_r", "om_r"], \
["dbthirdbar_r", "dbthirdbar_r"], ["ec_r", "ec_r"], ["ph1to1h2o_r", "ph1to1h2o_r"], \
["awc_r", "awc_r"]]
fldHz2 = list()
dHz = dict()
whereClause = "hzdept_r is not NULL and hzdepb_r is not NULL"
sqlClause = (None, "ORDER BY chkey, hzdept_r ASC")
for fld in fldHz:
fldHz2.append(fld[0])
hzTbl = os.path.join(inputDB, "chorizon")
with arcpy.da.SearchCursor(hzTbl, fldHz2, where_clause=whereClause, sql_clause=sqlClause) as hcur:
for hrec in hcur:
rec = list(hrec)
cokey = rec.pop(0)
try:
# Add next horizon record to list
dHz[cokey].append(rec)
except:
# initialize list of horizon records
dHz[cokey] = [rec]
del hzTbl
# HORIZON TEXTURE QUERY
#
# Not using horizon texture data in validation. All related code has been commented out, but kept for future use.
#
#PrintMsg(" \nMaking query table (CT) for texture information", 0)
#inputTbls = list()
#tbls = ["chtexturegrp", "chtexture"]
#for tbl in tbls:
# inputTbls.append(os.path.join(inputDB, tbl))
#txList1 = [["chtexturegrp.chkey", "chkey"], ["chtexturegrp.texture", "texture"], ["chtexture.lieutex", "lieutex"]]
#whereClause = "chtexturegrp.chtgkey = chtexture.chtgkey and chtexturegrp.rvindicator = 'Yes'"
#arcpy.MakeQueryTable_management(inputTbls, queryCT, "USE_KEY_FIELDS", "#", txList1, whereClause)
# Read texture query into dictionary
#txList2 = ["chtexturegrp.chkey", "chtexturegrp.texture", "chtexture.lieutex"]
#dTexture = dict()
#ctCnt = int(arcpy.GetCount_management(queryCT).getOutput(0))
#arcpy.SetProgressor ("step", "Getting horizon texture information for QueryTable_HZ...", 0, ctCnt, 1)
# Have been running out of memory here if other applications are running. 5.66GB
#with arcpy.da.SearchCursor(queryCT, txList2) as cur:
# for rec in cur:
# dTexture[int(rec[0])] = [rec[1], rec[2]]
# arcpy.SetProgressorPosition()
#arcpy.Delete_management(queryCT)
# COMPONENT RESTRICTIONS depths will be saved to a dictionary for later comparison to the CHORIZON table
#
crTbl = os.path.join(inputDB, "corestrictions")
PrintMsg(" \nWriting component restrictions to " + os.path.join(outputDB, "QueryTable_CR"), 0)
#fldCr = [["cokey", "cokey"], ["resdept_r", "resdept_r"]]
#["reskind", "reskind"],\
#["reshard", "reshard"],\
#["resdept_r", "resdept_r"]]
#whereClause = "OBJECTID = 1"
#arcpy.MakeQueryTable_management(crTbl, queryCR, "USE_KEY_FIELDS", "#", fldCr, whereClause)
#if arcpy.Exists(os.path.join(outputDB, "QueryTable_CR")):
# arcpy.Delete_management(os.path.join(outputDB, "QueryTable_CR"))
#arcpy.CreateTable_management(outputDB, "QueryTable_CR", queryCR)
#arcpy.Delete_management(queryCR)
fldCr2 = ["cokey", "resdept_r"]
dCr = dict()
sqlClause = (None, "ORDER BY cokey, resdept_r ASC")
whereClause = "resdept_r is not NULL"
#crList = list() # list of components with a restriction
crCnt = int(arcpy.GetCount_management(crTbl).getOutput(0))
arcpy.SetProgressor ("step", "Saving component restriction information...", 0, crCnt, 1)
with arcpy.da.SearchCursor(crTbl, fldCr2, where_clause=whereClause, sql_clause=sqlClause) as crcur:
for crrec in crcur:
#rec = list(crrec)
#cokey = int(crrec[0])
cokey = crrec[0]
arcpy.SetProgressorPosition()
try:
dCr[cokey].append(crrec[1])
except:
dCr[cokey] = [crrec[1]]
del crTbl
PrintMsg(" \nCreating QueryTable_HZ in " + outputDB, 0)
fldCo.pop(0)
fldCo2.pop(0)
fldHz.pop(0)
fldHz2.pop(0)
# Create list of fields for query table
fldAll = list()
# Create list of fields for output cursor
fldAll2 = list()
for fld in fldMu:
fldAll.append(["mapunit." + fld[0], fld[1]])
fldAll2.append(fld[1])
for fld in fldCo:
fldAll.append(["component." + fld[0], fld[1]])
fldAll2.append(fld[1])
for fld in fldHz:
fldAll.append(["chorizon." + fld[0], fld[1]])
fldAll2.append(fld[1])
# Texture fields:
#fldAll2.append("texture")
#fldAll2.append("lieutex")
# Select component-horizon data for ALL components that have horizon data. Lack of horizon data
# will cause some components to be missing from the PWSL.
#
# Later on in the actual calculations for RZAWS, only the major-earthy components will be used. But
# all components are in this table!
whereClause = "mapunit.mukey = component.mukey and component.cokey = chorizon.cokey and mapunit.objectid = 1"
outputTable = os.path.join(outputDB, "QueryTable_HZ")
#PrintMsg(" \nCreating table " + outputTable + " with fields: " + str(fldAll), 0)
arcpy.MakeQueryTable_management(['mapunit', 'component', 'chorizon'], queryTemp, "USE_KEY_FIELDS", "#", fldAll, whereClause)
if arcpy.Exists(outputTable):
arcpy.Delete_management(outputTable)
arcpy.CreateTable_management(outputDB, "QueryTable_HZ", queryTemp)
#arcpy.AddField_management(outputTable, "texture", "TEXT", "", "", "30", "texture")
#arcpy.AddField_management(outputTable, "lieutex", "TEXT", "", "", "254", "lieutex")
arcpy.Delete_management(queryTemp)
del queryTemp
# Process dictionaries and use them to write out the new QueryTable_HZ table
#
# Open output table
outFld2 = arcpy.Describe(outputTable).fields
outFlds = list()
for fld in outFld2:
outFlds.append(fld.name)
outFlds.pop(0)
# Create empty lists to replace missing data
missingCo = ["", None, None, None, None, None, None, None, None, None, None]
missingHz = ["", None, None, None, None, None, None, None, None, None, None, None, None]
#missingTx = [None, None]
# Save information on mapunits or components with bad or missing data
#badMu = list() # list of mapunits with no components
muNoCo = list()
dNoCo = dict()
muNotCom = list()
coNoHz = list() # list of components with no horizons
dNoHz = dict() # component data for those components in coNoHz
arcpy.SetProgressor ("step", "Writing data to " + outputTable + "...", 0, len(muList), 1)
with arcpy.da.InsertCursor(outputTable, fldAll2) as ocur:
for mukey in muList:
mrec = dMu[mukey]
arcpy.SetProgressorPosition()
try:
coVals = dCo[mukey] # got component records for this mapunit
# Sort lists by comppct_r
coList = sorted(coVals, key = lambda x: int(x[1]))
for corec in coList:
cokey = corec[0]
try:
hzVals = dHz[cokey] # horizon records for this component
# Sort record by hzdept_r
hzList = sorted(hzVals, key = lambda x: int(x[3]))
for hzrec in hzList:
chkey = hzrec[0]
#try:
# Get horizon texture
# txrec = dTexture[chkey]
#except:
# txrec = missingTx
# Combine all records and write to table
newrec = [mukey]
newrec.extend(mrec)
newrec.extend(corec)
newrec.extend(hzrec)
#newrec.extend(txrec)
ocur.insertRow(newrec)
except KeyError:
# No horizon records for this component
comppct = corec[1]
compname = corec[3]
compkind = corec[4]
mjrcomp = corec[2]
#PrintMsg("Major compflag = " + str(corec), 1)
hzrec = missingHz
#txrec = missingTx
newrec = [mukey]
newrec.extend(mrec)
newrec.extend(corec)
newrec.extend(hzrec)
#newrec.extend(txrec)
ocur.insertRow(newrec)
if not (compname in ["NOTCOM", "NOTPUB"] or compkind == 'Miscellaneous area'):
# The 'Miscellaneous area' criteria is not always an indicator of no HZ data
#
badComp = [mukey, cokey, compname, compkind, mjrcomp, str(comppct)]
coNoHz.append(cokey) # add cokey to list of components with no horizon data
dNoHz[cokey] = badComp # add component information to dictionary
#PrintMsg(" \nMissing horizon data: " + str(corec), 1)
except:
PrintMsg(" \nhzVals error for " + mukey + ":" + cokey, 2)
PrintMsg(" \n" + str(fldAll2), 1)
errorMsg()
except:
# No component records for this map unit
corec = missingCo
hzrec = missingHz
#txrec = missingTx
newrec = [mukey]
newrec.extend(mrec)
newrec.extend(corec)
newrec.extend(hzrec)
#newrec.extend(txrec)
ocur.insertRow(newrec)
if mrec[0] in ['NOTCOM', 'NOTPUB']:
# skip map units that should never have component data
#
muNotCom.append(mukey)
else:
muNoCo.append(mukey)
dNoCo[mukey] = [mrec[0], mrec[1]] # Save map unit name for the report
#PrintMsg(" \n\n** No component data for " + str(mrec[1]), 2)
arcpy.ResetProgressor()
env.workspace = outputDB
return outputTable, dCr, muList, muNoCo, dNoCo, muNotCom, coNoHz, dNoHz
except MyError, e:
# Example: raise MyError("this is an error message")
PrintMsg(str(e) + " \n", 2)
return None * 8
except:
errorMsg()
return None* 8
## ===================================================================================
def RunReport(outputTable, dCr, muList, muNoCo, dNoCo, muNotCom, coNoHz, dNoHz):
# Generate reports from outputTable and component restriction depths dictionary
#
#
try:
# Run through QueryTbl_HZ table, checking for inconsistencies in horizon depths
# Create a dictionary containing a list of top and bottom of each horizon in each component
# dictionary key = cokey
# list contains tuples of hzdept_r, hzdepb_r, hzname, mukey, compname, localphase
# What about components that have no horizon data?
# For this final report section we need dCR and outputTable
dHZ = dict()
# Save information on mapunits or components with bad or missing data
#badMu = list() # list of mapunits with no components
muNoCo = list()
dNoCo = dict()
muNotCom = list()
coNoHz = list() # list of components with no horizons
dNoHz = dict() # component data for those components in coNoHz
# Exclude horizon data with null hzdep with whereclause
wc = "hzdept_r is not null and hzdepb_r is not null"
arcpy.ResetProgressor()
arcpy.SetProgressorLabel("Looking for inconsistencies in horizon depths...")
with arcpy.da.SearchCursor(outputTable, ['mukey', 'cokey','hzdept_r','hzdepb_r', 'hzname', 'compname', 'localphase', 'majcompflag'], where_clause=wc) as cur:
for rec in cur:
mukey, cokey, top, bot, hzname, compname, localphase, majcomp = rec
if cokey in dHZ:
vals = dHZ[cokey]
vals.append([top, bot, hzname, mukey, compname, localphase, majcomp])
dHZ[cokey] = vals
else:
dHZ[cokey] = [[top, bot, hzname, mukey, compname, localphase, majcomp]]
try:
if top in dCr[cokey]:
# remove top horizon depth from list of component restriction depths
# in the end, those left in the dictionary should be ones that don't match the CHORIZON table hzdept_r values
dCr[cokey] = [t for t in dCr[cokey] if t != top]
except:
pass
# This should be the end of any reference to 'outputTable'
arcpy.Delete_management(outputTable)
del outputTable
# Number of items in each component value = len(rec)
# top of first horizon = rec[0][0]
# bottom of last horizon = rec[len(rec) - 1][1]
# component thickness #1 = rec[len(rec) - 1][1] - rec[0][0]
# Read each entry in the dictionary and check for gaps and overlaps in the horizons
#
badCoHz = list()
badHorizons = list()
for cokey, vals in dHZ.items():
# Process each component
#
hzSum = 0 # itialize sum of horizon thicknesses
lb = vals[0][0] # initialize last bottom to the top of the first horizon
localphase = vals[0][5]
if localphase is None:
localphase = ""
else:
localphase = " " + localphase
for v in vals:
# Process each horizon in the component record
#
# sum of bottom - top for each horizon
hzSum += (v[1] - v[0])
# Check for consistency between tops and bottoms for each consecutive horizon
if v[0] != lb:
diff = v[0] - lb
badCoHz.append(cokey)
badHorizons.append(v[3] + ", " + cokey + ", " + v[4] + localphase + ", " + majcomp + ", " + str(v[2]) + ", " + str(v[0]) + ", " + str(diff) )
lb = v[1] # update last bottom depth
PrintMsg(" \nIdentifying component restrictions with horizon depth discrepancies", 0)
arcpy.SetProgressor ("step", "Identifying component restrictions with horizon depth discrepancies", 0, len(dCr), 1)
for cokey, crDepths in dCr.items():
if len(crDepths) == 0:
dCr.pop(cokey, None)
# Check for sum of comppct_r > 100 or missing
muBadPct = list()
for mukey in muList:
try:
sumPct = dPct[str(mukey)][0]
if sumPct < 75 or sumPct > 100:
muBadPct.append(mukey)
except KeyError:
if not str(mukey) in muNotCom:
muBadPct.append(mukey)
del muList
# Report data validation failures...
#
# Save data issues to permanent files for later review
if len(muNoCo) or len(coNoHz) or len(badCoHz) or len(muBadPct) or len(dCr):
PrintMsg(" \nCreating log file: " + logFile, 1)
now = datetime.now()
fh = open(logFile, "w")
fh.write("\n" + inputDB + "\n")
fh.write("\nProcessed on " + now.strftime('%A %x %X') + "\n\n")
fh.write("This log file contains record of any basic data inconsistencies found in the gSSURGO database \n ")
fh.close()
# Report map units with sum of all components > 100 or < 75
# These data are stored in dPct where mukey is text but mukey is integer in muList
#
if len(muBadPct) > 0:
fh = open(logFile, "a")
fh.write("\nQuery for map units with sum of comppct_r < 75 or > 100\n")
fh.write("====================================================================================\n")
fh.write("MUKEY IN ('" + "', '".join(muBadPct) + "') \n\n")
fh.close()
PrintMsg(" \nMap units (" + Number_Format(len(muBadPct), 0, True) + ") with sum of comppct_r less than 75 or greater than 100 saved to log file", 0)
# Save data issues (mapunits with no components) to log file for later review
# Some of these will be NOTCOMs
if len(muNoCo) > 0:
fh = open(logFile, "a")
#fh.write(inputDB + "\n")
fh.write("\nQuery for map units missing component data\n")
fh.write("====================================================================================\n")
fh.write("MUKEY IN ('" + "', '".join(muNoCo) + "') \n\n")
fh.write("\n\nTable of map units missing component data\n")
fh.write("\nMUKEY, MUSYM, MUNAME\n")
for mukey in muNoCo:
fh.write(mukey + ", " + dNoCo[mukey][0] + ", " + dNoCo[mukey][1] + "\n")
fh.close()
PrintMsg(" \nMap units missing component data (" + Number_Format(len(muNoCo), 0, True) + ") saved to logfile", 0)
# Save data issues (components with no horizons) to log file for later review
# Note; these COKEYs will work with gSSURGO but not Soil Data Access
# Some of these may be NOTCOMs
if len(coNoHz) > 0:
PrintMsg(" \nQuery for components missing horizon data (" + Number_Format(len(coNoHz), 0, True) + ") saved to logfile", 0)
fh = open(logFile, "a")
fh.write("\n\nQuery for components with no horizon data\n")
fh.write("====================================================================================\n")
fh.write("COKEY IN ('" + "', '".join(coNoHz) + "') \n\n")
fh.write("Table of components that are completely missing horizon data\n")
fh.write("\nMUKEY, COKEY, COMPNAME, MAJCOMPFLAG, COMPKIND, MAJCOMPFLAG, COMPPCT, COMPKIND\n")
for mukey, compInfo in dNoHz.items():
# mukey, str(cokey), compname, compkind, mjrcomp, str(comppct)
mukey, cokey, compname, compkind, majcomp, comppct = compInfo
#compInfo = mukey, str(cokey), compname, compkind, str(comppct)
fh.write(mukey + ", " + cokey + ", " + compname + ", " + str(compkind) + ", " + majcomp + ", " + str(comppct) + ", " + str(compkind) + "\n")
fh.close()
# These components have inconsistent horizon depths that overlap or gap.
# Note; these COKEYs will work with gSSURGO but not Soil Data Access
if len(badCoHz) > 0:
PrintMsg(" \nComponents with horizon gaps or overlaps (" + Number_Format(len(badCoHz), 0, True) + ") saved to:\t" + logFile, 0)
fh = open(logFile, "a")
fh.write("\n\nQuery for components with horizon gaps or overlaps\n")
fh.write("====================================================================================\n")
fh.write("COKEY IN ('" + "', '".join(badCoHz) + "') \n\n")
fh.write("\nTable of components with horizon gaps or overlaps\n")
fh.write("MUKEY, COKEY, COMPNAME, MJRCOMP, HZNAME, HZDEPT, DIFF\n")
for h in badHorizons:
fh.write(h + "\n")
fh.close()
# These components have restriction depths that are inconsistent with horizon depths
# Note; these COKEYs will work with gSSURGO but not Soil Data Access or NASIS
# If a component has restrictions but no horizon data, they will get flagged even if zero depth.
#
if len(dCr) > 0:
PrintMsg(" \nComponents with horizon restriction discrepancies (" + Number_Format(len(dCr), 0, True) + ") saved to:\t" + logFile, 0)
fh = open(logFile, "a")
fh.write("\n\nQuery for components with horizon restriction discrepancies\n")
fh.write("====================================================================================\n")
fh.write("COKEY IN ('" + "', '".join(dCr.keys()) + "') \n\n")
fh.write("\nTable of components with restriction depth discrepancies\n")
fh.write("COKEY, HZDEPT\n")
for cokey, hzdept in dCr.items():
fh.write(cokey + ", " + str(hzdept) + "\n")
fh.close()
os.startfile(logFile)
else:
PrintMsg(" \nNo data validation issues detected", 0)
arcpy.ResetProgressor()
env.workspace = outputDB
return True
except MyError, e:
# Example: raise MyError("this is an error message")
PrintMsg(str(e) + " \n", 2)
return False
except:
errorMsg()
return False
## ===================================================================================
## ====================================== Main Body ==================================
# Import modules
import os, sys, string, re, locale, arcpy, traceback, collections
from arcpy import env
from datetime import datetime
try:
arcpy.OverwriteOutput = True
inputDB = arcpy.GetParameterAsText(0) # Input gSSURGO database
# Set location for temporary tables
#outputDB = "IN_MEMORY"
outputDB = env.scratchGDB
# Name of mapunit level output table (global variable)
theMuTable = os.path.join(inputDB, "MuTest")
# Name of component level output table (global variable)
theCompTable = os.path.join(inputDB, "Co_Test")
# Set output workspace to same as the input table
#env.workspace = os.path.dirname(arcpy.Describe(queryTbl).catalogPath)
env.workspace = inputDB
# Save record of any issues to a text file
logFile = os.path.basename(inputDB)[:-4] + "_Problems.txt" # filename based upon input gdb name
logFile = os.path.join(os.path.dirname(inputDB), logFile) # full path
# Get the mapunit - sum of component percent for calculations
dPct = GetSumPct(inputDB)
if len(dPct) == 0:
raise MyError, ""
# Create initial set of query tables used for RZAWS, AWS and SOC
outputTable, dCr, muList, muNoCo, dNoCo, muNotCom, coNoHz, dNoHz = CreateQueryTables(inputDB, outputDB, 150.0, dPct)
if outputTable is None:
raise MyError, ""
bReport = RunReport(outputTable, dCr, muList, muNoCo, dNoCo, muNotCom, coNoHz, dNoHz)
if bReport:
PrintMsg(" \nValidation process complete for " + inputDB, 0)
except MyError, e:
# Example: raise MyError("this is an error message")
PrintMsg(str(e) + " \n", 2)
except:
errorMsg()