forked from Phil-Factor/ParseJSON
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathParseJSON.sql
276 lines (270 loc) · 12.7 KB
/
ParseJSON.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
Alter FUNCTION dbo.parseJSON( @JSON NVARCHAR(MAX))
/**
Summary: >
The code for the JSON Parser/Shredder will run in SQL Server 2005,
and even in SQL Server 2000 (with some modifications required).
First the function replaces all strings with tokens of the form @Stringxx,
where xx is the foreign key of the table variable where the strings are held.
This takes them, and their potentially difficult embedded brackets, out of
the way. Names are always strings in JSON as well as string values.
Then, the routine iteratively finds the next structure that has no structure
Contained within it, (and is, by definition the leaf structure), and parses it,
replacing it with an object token of the form ‘@Objectxxx‘, or ‘@arrayxxx‘,
where xxx is the object id assigned to it. The values, or name/value pairs
are retrieved from the string table and stored in the hierarchy table. G
radually, the JSON document is eaten until there is just a single root
object left.
Author: PhilFactor
Date: 01/07/2010
Version:
Number: 4.6.2
Date: 01/07/2019
Why: case-insensitive version
Example: >
Select * from parseJSON('{ "Person":
{
"firstName": "John",
"lastName": "Smith",
"age": 25,
"Address":
{
"streetAddress":"21 2nd Street",
"city":"New York",
"state":"NY",
"postalCode":"10021"
},
"PhoneNumbers":
{
"home":"212 555-1234",
"fax":"646 555-4567"
}
}
}
')
Returns: >
nothing
**/
RETURNS @hierarchy TABLE
(
Element_ID INT IDENTITY(1, 1) NOT NULL, /* internal surrogate primary key gives the order of parsing and the list order */
SequenceNo [int] NULL, /* the place in the sequence for the element */
Parent_ID INT null, /* if the element has a parent then it is in this column. The document is the ultimate parent, so you can get the structure from recursing from the document */
Object_ID INT null, /* each list or object has an object id. This ties all elements to a parent. Lists are treated as objects here */
Name NVARCHAR(2000) NULL, /* the Name of the object */
StringValue NVARCHAR(MAX) NOT NULL,/*the string representation of the value of the element. */
ValueType VARCHAR(10) NOT null /* the declared type of the value represented as a string in StringValue*/
)
/*
*/
AS
BEGIN
DECLARE
@FirstObject INT, --the index of the first open bracket found in the JSON string
@OpenDelimiter INT,--the index of the next open bracket found in the JSON string
@NextOpenDelimiter INT,--the index of subsequent open bracket found in the JSON string
@NextCloseDelimiter INT,--the index of subsequent close bracket found in the JSON string
@Type NVARCHAR(10),--whether it denotes an object or an array
@NextCloseDelimiterChar CHAR(1),--either a '}' or a ']'
@Contents NVARCHAR(MAX), --the unparsed contents of the bracketed expression
@Start INT, --index of the start of the token that you are parsing
@end INT,--index of the end of the token that you are parsing
@param INT,--the parameter at the end of the next Object/Array token
@EndOfName INT,--the index of the start of the parameter at end of Object/Array token
@token NVARCHAR(200),--either a string or object
@value NVARCHAR(MAX), -- the value as a string
@SequenceNo int, -- the sequence number within a list
@Name NVARCHAR(200), --the Name as a string
@Parent_ID INT,--the next parent ID to allocate
@lenJSON INT,--the current length of the JSON String
@characters NCHAR(36),--used to convert hex to decimal
@result BIGINT,--the value of the hex symbol being parsed
@index SMALLINT,--used for parsing the hex value
@Escape INT --the index of the next escape character
DECLARE @Strings TABLE /* in this temporary table we keep all strings, even the Names of the elements, since they are 'escaped' in a different way, and may contain, unescaped, brackets denoting objects or lists. These are replaced in the JSON string by tokens representing the string */
(
String_ID INT IDENTITY(1, 1),
StringValue NVARCHAR(MAX)
)
SELECT--initialise the characters to convert hex to ascii
@characters='0123456789abcdefghijklmnopqrstuvwxyz',
@SequenceNo=0, --set the sequence no. to something sensible.
/* firstly we process all strings. This is done because [{} and ] aren't escaped in strings, which complicates an iterative parse. */
@Parent_ID=0;
WHILE 1=1 --forever until there is nothing more to do
BEGIN
SELECT
@start=PATINDEX('%[^a-zA-Z]["]%', @json collate SQL_Latin1_General_CP850_Bin);--next delimited string
IF @start=0 BREAK --no more so drop through the WHILE loop
IF SUBSTRING(@json, @start+1, 1)='"'
BEGIN --Delimited Name
SET @start=@Start+1;
SET @end=PATINDEX('%[^\]["]%', RIGHT(@json, LEN(@json+'|')-@start) collate SQL_Latin1_General_CP850_Bin);
END
IF @end=0 --either the end or no end delimiter to last string
BEGIN-- check if ending with a double slash...
SET @end=PATINDEX('%[\][\]["]%', RIGHT(@json, LEN(@json+'|')-@start) collate SQL_Latin1_General_CP850_Bin);
IF @end=0 --we really have reached the end
BEGIN
BREAK --assume all tokens found
END
END
SELECT @token=SUBSTRING(@json, @start+1, @end-1)
--now put in the escaped control characters
SELECT @token=REPLACE(@token, FromString, ToString)
FROM
(SELECT '\b', CHAR(08)
UNION ALL SELECT '\f', CHAR(12)
UNION ALL SELECT '\n', CHAR(10)
UNION ALL SELECT '\r', CHAR(13)
UNION ALL SELECT '\t', CHAR(09)
UNION ALL SELECT '\"', '"'
UNION ALL SELECT '\/', '/'
) substitutions(FromString, ToString)
SELECT @token=Replace(@token, '\\', '\')
SELECT @result=0, @escape=1
--Begin to take out any hex escape codes
WHILE @escape>0
BEGIN
SELECT @index=0,
--find the next hex escape sequence
@escape=PATINDEX('%\x[0-9a-f][0-9a-f][0-9a-f][0-9a-f]%', @token collate SQL_Latin1_General_CP850_Bin)
IF @escape>0 --if there is one
BEGIN
WHILE @index<4 --there are always four digits to a \x sequence
BEGIN
SELECT --determine its value
@result=@result+POWER(16, @index)
*(CHARINDEX(SUBSTRING(@token, @escape+2+3-@index, 1),
@characters)-1), @index=@index+1 ;
END
-- and replace the hex sequence by its unicode value
SELECT @token=STUFF(@token, @escape, 6, NCHAR(@result))
END
END
--now store the string away
INSERT INTO @Strings (StringValue) SELECT @token
-- and replace the string with a token
SELECT @JSON=STUFF(@json, @start, @end+1,
'@string'+CONVERT(NCHAR(5), @@identity))
END
-- all strings are now removed. Now we find the first leaf.
WHILE 1=1 --forever until there is nothing more to do
BEGIN
SELECT @Parent_ID=@Parent_ID+1
--find the first object or list by looking for the open bracket
SELECT @FirstObject=PATINDEX('%[{[[]%', @json collate SQL_Latin1_General_CP850_Bin)--object or array
IF @FirstObject = 0 BREAK
IF (SUBSTRING(@json, @FirstObject, 1)='{')
SELECT @NextCloseDelimiterChar='}', @type='object'
ELSE
SELECT @NextCloseDelimiterChar=']', @type='array'
SELECT @OpenDelimiter=@firstObject
WHILE 1=1 --find the innermost object or list...
BEGIN
SELECT
@lenJSON=LEN(@JSON+'|')-1
--find the matching close-delimiter proceeding after the open-delimiter
SELECT
@NextCloseDelimiter=CHARINDEX(@NextCloseDelimiterChar, @json,
@OpenDelimiter+1)
--is there an intervening open-delimiter of either type
SELECT @NextOpenDelimiter=PATINDEX('%[{[[]%',
RIGHT(@json, @lenJSON-@OpenDelimiter)collate SQL_Latin1_General_CP850_Bin)--object
IF @NextOpenDelimiter=0
BREAK
SELECT @NextOpenDelimiter=@NextOpenDelimiter+@OpenDelimiter
IF @NextCloseDelimiter<@NextOpenDelimiter
BREAK
IF SUBSTRING(@json, @NextOpenDelimiter, 1)='{'
SELECT @NextCloseDelimiterChar='}', @type='object'
ELSE
SELECT @NextCloseDelimiterChar=']', @type='array'
SELECT @OpenDelimiter=@NextOpenDelimiter
END
---and parse out the list or Name/value pairs
SELECT
@contents=SUBSTRING(@json, @OpenDelimiter+1,
@NextCloseDelimiter-@OpenDelimiter-1)
SELECT
@JSON=STUFF(@json, @OpenDelimiter,
@NextCloseDelimiter-@OpenDelimiter+1,
'@'+@type+CONVERT(NCHAR(5), @Parent_ID))
WHILE (PATINDEX('%[A-Za-z0-9@+.e]%', @contents collate SQL_Latin1_General_CP850_Bin))<>0
BEGIN
IF @Type='object' --it will be a 0-n list containing a string followed by a string, number,boolean, or null
BEGIN
SELECT
@SequenceNo=0,@end=CHARINDEX(':', ' '+@contents)--if there is anything, it will be a string-based Name.
SELECT @start=PATINDEX('%[^A-Za-z@][@]%', ' '+@contents collate SQL_Latin1_General_CP850_Bin)--AAAAAAAA
SELECT @token=RTrim(Substring(' '+@contents, @start+1, @End-@Start-1)),
@endofName=PATINDEX('%[0-9]%', @token collate SQL_Latin1_General_CP850_Bin),
@param=RIGHT(@token, LEN(@token)-@endofName+1)
SELECT
@token=LEFT(@token, @endofName-1),
@Contents=RIGHT(' '+@contents, LEN(' '+@contents+'|')-@end-1)
SELECT @Name=StringValue FROM @strings
WHERE string_id=@param --fetch the Name
END
ELSE
SELECT @Name=null,@SequenceNo=@SequenceNo+1
SELECT
@end=CHARINDEX(',', @contents)-- a string-token, object-token, list-token, number,boolean, or null
IF @end=0
--HR Engineering notation bugfix start
IF ISNUMERIC(@contents) = 1
SELECT @end = LEN(@contents) + 1
Else
--HR Engineering notation bugfix end
SELECT @end=PATINDEX('%[A-Za-z0-9@+.e][^A-Za-z0-9@+.e]%', @contents+' ' collate SQL_Latin1_General_CP850_Bin) + 1
SELECT
@start=PATINDEX('%[^A-Za-z0-9@+.e][A-Za-z0-9@+.e]%', ' '+@contents collate SQL_Latin1_General_CP850_Bin)
--select @start,@end, LEN(@contents+'|'), @contents
SELECT
@Value=RTRIM(SUBSTRING(@contents, @start, @End-@Start)),
@Contents=RIGHT(@contents+' ', LEN(@contents+'|')-@end)
IF SUBSTRING(@value, 1, 7)='@object'
INSERT INTO @hierarchy
(Name, SequenceNo, Parent_ID, StringValue, Object_ID, ValueType)
SELECT @Name, @SequenceNo, @Parent_ID, SUBSTRING(@value, 8, 5),
SUBSTRING(@value, 8, 5), 'object'
ELSE
IF SUBSTRING(@value, 1, 6)='@array'
INSERT INTO @hierarchy
(Name, SequenceNo, Parent_ID, StringValue, Object_ID, ValueType)
SELECT @Name, @SequenceNo, @Parent_ID, SUBSTRING(@value, 7, 5),
SUBSTRING(@value, 7, 5), 'array'
ELSE
IF SUBSTRING(@value, 1, 7)='@string'
INSERT INTO @hierarchy
(Name, SequenceNo, Parent_ID, StringValue, ValueType)
SELECT @Name, @SequenceNo, @Parent_ID, StringValue, 'string'
FROM @strings
WHERE string_id=SUBSTRING(@value, 8, 5)
ELSE
IF @value IN ('true', 'false')
INSERT INTO @hierarchy
(Name, SequenceNo, Parent_ID, StringValue, ValueType)
SELECT @Name, @SequenceNo, @Parent_ID, @value, 'boolean'
ELSE
IF @value='null'
INSERT INTO @hierarchy
(Name, SequenceNo, Parent_ID, StringValue, ValueType)
SELECT @Name, @SequenceNo, @Parent_ID, @value, 'null'
ELSE
IF PATINDEX('%[^0-9]%', @value collate SQL_Latin1_General_CP850_Bin)>0
INSERT INTO @hierarchy
(Name, SequenceNo, Parent_ID, StringValue, ValueType)
SELECT @Name, @SequenceNo, @Parent_ID, @value, 'real'
ELSE
INSERT INTO @hierarchy
(Name, SequenceNo, Parent_ID, StringValue, ValueType)
SELECT @Name, @SequenceNo, @Parent_ID, @value, 'int'
if @Contents=' ' Select @SequenceNo=0
END
END
INSERT INTO @hierarchy (Name, SequenceNo, Parent_ID, StringValue, Object_ID, ValueType)
SELECT '-',1, NULL, '', @Parent_ID-1, @type
--
RETURN
END
GO