MOD Avancement sur le passage à JSON
[euphorik.git] / modules / erl / json.erl
1 %%% Copyright (c) 2005-2006, A2Z Development USA, Inc. All Rights Reserved.
2 %%%
3 %%% The contents of this file are subject to the Erlang Public License,
4 %%% Version 1.1, (the "License"); you may not use this file except in
5 %%% compliance with the License. You should have received a copy of the
6 %%% Erlang Public License along with this software. If not, it can be
7 %%% retrieved via the world wide web at http://www.erlang.org/.
8 %%%
9 %%% Software distributed under the License is distributed on an "AS IS"
10 %%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
11 %%% the License for the specific language governing rights and limitations
12 %%% under the License.
13 %%%
14 %%% The Initial Developer of the Original Code is A2Z Development USA, Inc.
15 %%% All Rights Reserved.
16
17 -module(json).
18 -export([encode/1, decode_string/1, decode/2]).
19 -export([is_obj/1, obj_new/0, obj_fetch/2, obj_find/2, obj_is_key/2]).
20 -export([obj_store/3, obj_from_list/1, obj_fold/3]).
21 -export([test/0]).
22 -author("Jim Larson <jalarson@amazon.com>, Robert Wai-Chi Chu <robchu@amazon.com>").
23 -vsn("1").
24
25 %%% JavaScript Object Notation ("JSON", http://www.json.org) is a simple
26 %%% data syntax meant as a lightweight alternative to other representations,
27 %%% such as XML. JSON is natively supported by JavaScript, but many
28 %%% other languages have conversion libraries available.
29 %%%
30 %%% This module translates JSON types into the following Erlang types:
31 %%%
32 %%% JSON Erlang
33 %%% ---- ------
34 %%% number number
35 %%% string string
36 %%% array tuple
37 %%% object tagged proplist with string (or atom) keys
38 %%% true, false, null atoms 'true', 'false', and 'null'
39 %%%
40 %%% Character Sets: the external representation, and the internal
41 %%% representation of strings, are lists of UTF-16 code units.
42 %%% The encoding of supplementary characters, as well as
43 %%% transcoding to other schemes, such as UTF-8, can be provided
44 %%% by other modules. (See discussion at
45 %%% http://groups.yahoo.com/group/json/message/52)
46 %%%
47 %%% Numbers: Thanks to Erlang's bignums, JSON-encoded integers of any
48 %%% size can be parsed. Conversely, extremely large integers may
49 %%% be JSON-encoded. This may cause problems for interoperability
50 %%% with JSON parsers which can't handle arbitrary-sized integers.
51 %%% Erlang's floats are of fixed precision and limited range, so
52 %%% syntactically valid JSON floating-point numbers could silently
53 %%% lose precision or noisily cause an overflow. However, most
54 %%% other JSON libraries are likely to behave in the same way.
55 %%% The encoding precision defaults to 6 digits.
56 %%%
57 %%% Strings: If we represented JSON string data as Erlang binaries,
58 %%% we would have to choose a particular unicode format. Instead,
59 %%% we use lists of UTF-16 code units, which applications may then
60 %%% change to binaries in their application-preferred manner.
61 %%%
62 %%% Arrays: Because of the string decision above, and Erlang's
63 %%% lack of a distinguished string datatype, JSON arrays map
64 %%% to Erlang tuples. Consider utilities like tuple_fold/3
65 %%% to deal with tuples in their native form.
66 %%%
67 %%% Objects: Though not explicitly stated in the JSON "spec",
68 %%% JSON's JavaScript heritage mandates that member names must
69 %%% be unique within an object. The object/tuple ambiguity is
70 %%% not a problem, since the atom 'json_object' is not an
71 %%% allowable value. Object keys may be atoms or strings on
72 %%% encoding but are always decoded as strings.
73
74 %%% ENCODING
75
76 %% Encode an erlang number, string, tuple, or object to JSON syntax, as a
77 %% possibly deep list of UTF-16 code units, throwing a runtime error in the
78 %% case of un-convertible input.
79 %% Note: object keys may be either strings or atoms.
80
81 encode(true) -> "true";
82 encode(false) -> "false";
83 encode(null) -> "null";
84 encode(I) when is_integer(I) -> integer_to_list(I);
85 encode(F) when is_float(F) -> io_lib:format("~g", [F]);
86 encode(L) when is_list(L) -> encode_string(L);
87 encode({}) -> "[]";
88 encode({json_object, Props} = T) when is_list(Props) -> encode_object(T);
89 encode(T) when is_tuple(T) -> encode_array(T);
90 encode(Bad) -> exit({json_encode, {bad_term, Bad}}).
91
92 %% Encode an Erlang string to JSON.
93 %% Accumulate strings in reverse.
94
95 encode_string(S) -> encode_string(S, [$"]).
96
97 encode_string([], Acc) -> lists:reverse([$" | Acc]);
98 encode_string([C | Cs], Acc) ->
99 case C of
100 $" -> encode_string(Cs, [$", $\\ | Acc]);
101 % (don't escape solidus on encode)
102 $\\ -> encode_string(Cs, [$\\, $\\ | Acc]);
103 $\b -> encode_string(Cs, [$b, $\\ | Acc]); % note missing \
104 $\f -> encode_string(Cs, [$f, $\\ | Acc]);
105 $\n -> encode_string(Cs, [$n, $\\ | Acc]);
106 $\r -> encode_string(Cs, [$r, $\\ | Acc]);
107 $\t -> encode_string(Cs, [$t, $\\ | Acc]);
108 C when C >= 0, C < $\s ->
109 % Control characters must be unicode-encoded.
110 Hex = lists:flatten(io_lib:format("~4.16.0b", [C])),
111 encode_string(Cs, lists:reverse(Hex) ++ "u\\" ++ Acc);
112 C when C =< 16#FFFF -> encode_string(Cs, [C | Acc]);
113 _ -> exit({json_encode, {bad_char, C}})
114 end.
115
116 %% Encode an Erlang object as a JSON object, allowing string or atom keys.
117 %% Note that order is irrelevant in both internal and external object
118 %% representations. Nevertheless, the output will respect the order
119 %% of the input.
120
121 encode_object({json_object, _Props} = Obj) ->
122 M = obj_fold(fun({Key, Value}, Acc) ->
123 S = case Key of
124 L when is_list(L) -> encode_string(L);
125 A when is_atom(A) -> encode_string(atom_to_list(A));
126 _ -> exit({json_encode, {bad_key, Key}})
127 end,
128 V = encode(Value),
129 case Acc of
130 [] -> [S, $:, V];
131 _ -> [Acc, $,, S, $:, V]
132 end
133 end, [], Obj),
134 [${, M, $}].
135
136 %% Encode an Erlang tuple as a JSON array.
137 %% Order *is* significant in a JSON array!
138
139 encode_array(T) ->
140 M = tuple_fold(fun(E, Acc) ->
141 V = encode(E),
142 case Acc of
143 [] -> V;
144 _ -> [Acc, $,, V]
145 end
146 end, [], T),
147 [$[, M, $]].
148
149 %% A fold function for tuples (left-to-right).
150 %% Folded function takes arguments (Element, Accumulator).
151
152 tuple_fold(F, A, T) when is_tuple(T) ->
153 tuple_fold(F, A, T, 1, size(T)).
154
155 tuple_fold(_F, A, _T, I, N) when I > N ->
156 A;
157 tuple_fold(F, A, T, I, N) ->
158 A2 = F(element(I, T), A),
159 tuple_fold(F, A2, T, I + 1, N).
160
161 %%% SCANNING
162 %%%
163 %%% Scanning funs return either:
164 %%% {done, Result, LeftOverChars}
165 %%% if a complete token is recognized, or
166 %%% {more, Continuation}
167 %%% if more input is needed.
168 %%% Result is {ok, Term}, 'eof', or {error, Reason}.
169 %%% Here, the Continuation is a simple Erlang string.
170 %%%
171 %%% Currently, error handling is rather crude - errors are recognized
172 %%% by match failures. EOF is handled only by number scanning, where
173 %%% it can delimit a number, and otherwise causes a match failure.
174 %%%
175 %%% Tokens are one of the following
176 %%% JSON string -> erlang string
177 %%% JSON number -> erlang number
178 %%% true, false, null -> erlang atoms
179 %%% { } [ ] : , -> lcbrace rcbrace lsbrace rsbrace colon comma
180
181 token([]) -> {more, []};
182 token(eof) -> {done, eof, []};
183
184 token("true" ++ Rest) -> {done, {ok, true}, Rest};
185 token("tru") -> {more, "tru"};
186 token("tr") -> {more, "tr"};
187 token("t") -> {more, "t"};
188
189 token("false" ++ Rest) -> {done, {ok, false}, Rest};
190 token("fals") -> {more, "fals"};
191 token("fal") -> {more, "fal"};
192 token("fa") -> {more, "fa"};
193 token("f") -> {more, "f"};
194
195 token("null" ++ Rest) -> {done, {ok, null}, Rest};
196 token("nul") -> {more, "nul"};
197 token("nu") -> {more, "nu"};
198 token("n") -> {more, "n"};
199
200 token([C | Cs] = Input) ->
201 case C of
202 $\s -> token(Cs); % eat whitespace
203 $\t -> token(Cs); % eat whitespace
204 $\n -> token(Cs); % eat whitespace
205 $\r -> token(Cs); % eat whitespace
206 $" -> scan_string(Input);
207 $- -> scan_number(Input);
208 D when D >= $0, D =< $9-> scan_number(Input);
209 ${ -> {done, {ok, lcbrace}, Cs};
210 $} -> {done, {ok, rcbrace}, Cs};
211 $[ -> {done, {ok, lsbrace}, Cs};
212 $] -> {done, {ok, rsbrace}, Cs};
213 $: -> {done, {ok, colon}, Cs};
214 $, -> {done, {ok, comma}, Cs};
215 $/ -> case scan_comment(Cs) of
216 {more, X} -> {more, X};
217 {done, _, Chars} -> token(Chars)
218 end;
219 _ -> {done, {error, {bad_char, C}}, Cs}
220 end.
221
222 scan_string([$" | Cs] = Input) ->
223 scan_string(Cs, [], Input).
224
225 %% Accumulate in reverse order, save original start-of-string for continuation.
226
227 scan_string([], _, X) -> {more, X};
228 scan_string(eof, _, X) -> {done, {error, missing_close_quote}, X};
229 scan_string([$" | Rest], A, _) -> {done, {ok, lists:reverse(A)}, Rest};
230 scan_string([$\\], _, X) -> {more, X};
231 scan_string([$\\, $u, U1, U2, U3, U4 | Rest], A, X) ->
232 scan_string(Rest, [uni_char([U1, U2, U3, U4]) | A], X);
233 scan_string([$\\, $u | _], _, X) -> {more, X};
234 scan_string([$\\, C | Rest], A, X) ->
235 scan_string(Rest, [esc_to_char(C) | A], X);
236 scan_string([C | Rest], A, X) ->
237 scan_string(Rest, [C | A], X).
238
239 %% Given a list of hex characters, convert to the corresponding integer.
240
241 uni_char(HexList) ->
242 erlang:list_to_integer(HexList, 16).
243
244 esc_to_char($") -> $";
245 esc_to_char($/) -> $/;
246 esc_to_char($\\) -> $\\;
247 esc_to_char($b) -> $\b;
248 esc_to_char($f) -> $\f;
249 esc_to_char($n) -> $\n;
250 esc_to_char($r) -> $\r;
251 esc_to_char($t) -> $\t.
252
253 scan_number([]) -> {more, []};
254 scan_number(eof) -> {done, {error, incomplete_number}, []};
255 scan_number([$- | Ds] = Input) ->
256 case scan_number(Ds) of
257 {more, _Cont} -> {more, Input};
258 {done, {ok, N}, CharList} -> {done, {ok, -1 * N}, CharList};
259 {done, Other, Chars} -> {done, Other, Chars}
260 end;
261 scan_number([D | Ds] = Input) when D >= $0, D =< $9 ->
262 scan_number(Ds, D - $0, Input).
263
264 %% Numbers don't have a terminator, so stop at the first non-digit,
265 %% and ask for more if we run out.
266
267 scan_number([], _A, X) -> {more, X};
268 scan_number(eof, A, _X) -> {done, {ok, A}, eof};
269 scan_number([$.], _A, X) -> {more, X};
270 scan_number([$., D | Ds], A, X) when D >= $0, D =< $9 ->
271 scan_fraction([D | Ds], A, X);
272 scan_number([D | Ds], A, X) when A > 0, D >= $0, D =< $9 ->
273 % Note that nonzero numbers can't start with "0".
274 scan_number(Ds, 10 * A + (D - $0), X);
275 scan_number([D | Ds], A, X) when D == $E; D == $e ->
276 scan_exponent_begin(Ds, float(A), X);
277 scan_number([D | _] = Ds, A, _X) when D < $0; D > $9 ->
278 {done, {ok, A}, Ds}.
279
280 scan_fraction(Ds, I, X) -> scan_fraction(Ds, [], I, X).
281
282 scan_fraction([], _Fs, _I, X) -> {more, X};
283 scan_fraction(eof, Fs, I, _X) ->
284 R = I + list_to_float("0." ++ lists:reverse(Fs)),
285 {done, {ok, R}, eof};
286 scan_fraction([D | Ds], Fs, I, X) when D >= $0, D =< $9 ->
287 scan_fraction(Ds, [D | Fs], I, X);
288 scan_fraction([D | Ds], Fs, I, X) when D == $E; D == $e ->
289 R = I + list_to_float("0." ++ lists:reverse(Fs)),
290 scan_exponent_begin(Ds, R, X);
291 scan_fraction(Rest, Fs, I, _X) ->
292 R = I + list_to_float("0." ++ lists:reverse(Fs)),
293 {done, {ok, R}, Rest}.
294
295 scan_exponent_begin(Ds, R, X) ->
296 scan_exponent_begin(Ds, [], R, X).
297
298 scan_exponent_begin([], _Es, _R, X) -> {more, X};
299 scan_exponent_begin(eof, _Es, _R, X) -> {done, {error, missing_exponent}, X};
300 scan_exponent_begin([D | Ds], Es, R, X) when D == $-;
301 D == $+;
302 D >= $0, D =< $9 ->
303 scan_exponent(Ds, [D | Es], R, X).
304
305 scan_exponent([], _Es, _R, X) -> {more, X};
306 scan_exponent(eof, Es, R, _X) ->
307 X = R * math:pow(10, list_to_integer(lists:reverse(Es))),
308 {done, {ok, X}, eof};
309 scan_exponent([D | Ds], Es, R, X) when D >= $0, D =< $9 ->
310 scan_exponent(Ds, [D | Es], R, X);
311 scan_exponent(Rest, Es, R, _X) ->
312 X = R * math:pow(10, list_to_integer(lists:reverse(Es))),
313 {done, {ok, X}, Rest}.
314
315 scan_comment([]) -> {more, "/"};
316 scan_comment(eof) -> {done, eof, []};
317 scan_comment([$/ | Rest]) -> scan_cpp_comment(Rest);
318 scan_comment([$* | Rest]) -> scan_c_comment(Rest).
319
320 %% Ignore up to next CR or LF. If the line ends in CRLF,
321 %% the LF will be treated as separate whitespace, which is
322 %% okay since it will also be ignored.
323
324 scan_cpp_comment([]) -> {more, "//"};
325 scan_cpp_comment(eof) -> {done, eof, []};
326 scan_cpp_comment([$\r | Rest]) -> {done, [], Rest};
327 scan_cpp_comment([$\n | Rest]) -> {done, [], Rest};
328 scan_cpp_comment([_ | Rest]) -> scan_cpp_comment(Rest).
329
330 scan_c_comment([]) -> {more, "/*"};
331 scan_c_comment(eof) -> {done, eof, []};
332 scan_c_comment([$*]) -> {more, "/**"};
333 scan_c_comment([$*, $/ | Rest]) -> {done, [], Rest};
334 scan_c_comment([_ | Rest]) -> scan_c_comment(Rest).
335
336 %%% PARSING
337 %%%
338 %%% The decode function takes a char list as input, but
339 %%% interprets the end of the list as only an end to the available
340 %%% input, and returns a "continuation" requesting more input.
341 %%% When additional characters are available, they, and the
342 %%% continuation, are fed into decode/2. You can use the atom 'eof'
343 %%% as a character to signal a true end to the input stream, and
344 %%% possibly flush out an unfinished number. The decode_string/1
345 %%% function appends 'eof' to its input and calls decode/1.
346 %%%
347 %%% Parsing and scanning errors are handled only by match failures.
348 %%% The external caller must take care to wrap the call in a "catch"
349 %%% or "try" if better error-handling is desired. Eventually parse
350 %%% or scan errors will be returned explicitly with a description,
351 %%% and someday with line numbers too.
352 %%%
353 %%% The parsing code uses a continuation-passing style to allow
354 %%% for the parsing to suspend at any point and be resumed when
355 %%% more input is available.
356 %%% See http://en.wikipedia.org/wiki/Continuation_passing_style
357
358 %% Return the first JSON value decoded from the input string.
359 %% The string must contain at least one complete JSON value.
360
361 decode_string(CharList) ->
362 {done, V, _} = decode([], CharList ++ eof),
363 V.
364
365 %% Attempt to decode a JSON value from the input string
366 %% and continuation, using empty list for the initial continuation.
367 %% Return {done, Result, LeftoverChars} if a value is recognized,
368 %% or {more, Continuation} if more input characters are needed.
369 %% The Result can be {ok, Value}, eof, or {error, Reason}.
370 %% The Continuation is then fed as an argument to decode/2 when
371 %% more input is available.
372 %% Use the atom 'eof' instead of a char list to signal
373 %% a true end to the input, and may flush a final number.
374
375 decode([], CharList) ->
376 decode(first_continuation(), CharList);
377
378 decode(Continuation, CharList) ->
379 {OldChars, Kt} = Continuation,
380 get_token(OldChars ++ CharList, Kt).
381
382 first_continuation() ->
383 {[], fun
384 (eof, Cs) ->
385 {done, eof, Cs};
386 (T, Cs) ->
387 parse_value(T, Cs, fun(V, C2) ->
388 {done, {ok, V}, C2}
389 end)
390 end}.
391
392 %% Continuation Kt must accept (TokenOrEof, Chars)
393
394 get_token(Chars, Kt) ->
395 case token(Chars) of
396 {done, {ok, T}, Rest} -> Kt(T, Rest);
397 {done, eof, Rest} -> Kt(eof, Rest);
398 {done, {error, Reason}, Rest} -> {done, {error, Reason}, Rest};
399 {more, X} -> {more, {X, Kt}}
400 end.
401
402 %% Continuation Kv must accept (Value, Chars)
403
404 parse_value(eof, C, _Kv) -> {done, {error, premature_eof}, C};
405 parse_value(true, C, Kv) -> Kv(true, C);
406 parse_value(false, C, Kv) -> Kv(false, C);
407 parse_value(null, C, Kv) -> Kv(null, C);
408 parse_value(S, C, Kv) when is_list(S) -> Kv(S, C);
409 parse_value(N, C, Kv) when is_number(N) -> Kv(N, C);
410 parse_value(lcbrace, C, Kv) -> parse_object(C, Kv);
411 parse_value(lsbrace, C, Kv) -> parse_array(C, Kv);
412 parse_value(_, C, _Kv) -> {done, {error, syntax_error}, C}.
413
414 %% Continuation Kv must accept (Value, Chars)
415
416 parse_object(Chars, Kv) ->
417 get_token(Chars, fun(T, C2) ->
418 Obj = obj_new(),
419 case T of
420 rcbrace -> Kv(Obj, C2); % empty object
421 _ -> parse_object(Obj, T, C2, Kv) % token must be string
422 end
423 end).
424
425 parse_object(_Obj, eof, C, _Kv) ->
426 {done, {error, premature_eof}, C};
427
428 parse_object(Obj, S, C, Kv) when is_list(S) -> % S is member name
429 get_token(C, fun
430 (colon, C2) ->
431 parse_object2(Obj, S, C2, Kv);
432 (T, C2) ->
433 {done, {error, {expecting_colon, T}}, C2}
434 end);
435
436 parse_object(_Obj, M, C, _Kv) ->
437 {done, {error, {member_name_not_string, M}}, C}.
438
439 parse_object2(Obj, S, C, Kv) ->
440 get_token(C, fun
441 (eof, C2) ->
442 {done, {error, premature_eof}, C2};
443 (T, C2) ->
444 parse_value(T, C2, fun(V, C3) -> % V is member value
445 Obj2 = obj_store(S, V, Obj),
446 get_token(C3, fun
447 (rcbrace, C4) ->
448 Kv(Obj2, C4); % "}" end of object
449 (comma, C4) -> % "," another member follows
450 get_token(C4, fun(T3, C5) ->
451 parse_object(Obj2, T3, C5, Kv)
452 end);
453 (eof, C4) ->
454 {done, {error, premature_eof}, C4};
455 (T2, C4) ->
456 {done, {error, {expecting_comma_or_curly, T2}}, C4}
457 end)
458 end)
459 end).
460
461 %% Continuation Kv must accept (Value, Chars)
462
463 parse_array(C, Kv) ->
464 get_token(C, fun
465 (eof, C2) -> {done, {error, premature_eof}, C2};
466 (rsbrace, C2) -> Kv({}, C2); % empty array
467 (T, C2) -> parse_array([], T, C2, Kv)
468 end).
469
470 parse_array(E, T, C, Kv) ->
471 parse_value(T, C, fun(V, C2) ->
472 E2 = [V | E],
473 get_token(C2, fun
474 (rsbrace, C3) -> % "]" end of array
475 Kv(list_to_tuple(lists:reverse(E2)), C3);
476 (comma, C3) -> % "," another value follows
477 get_token(C3, fun(T3, C4) ->
478 parse_array(E2, T3, C4, Kv)
479 end);
480 (eof, C3) ->
481 {done, {error, premature_eof}, C3};
482 (T2, C3) ->
483 {done, {error, {expecting_comma_or_close_array, T2}}, C3}
484 end)
485 end).
486
487 %%% OBJECTS
488 %%%
489 %%% We'll use tagged property lists as the internal representation
490 %%% of JSON objects. Unordered lists perform worse than trees for
491 %%% lookup and modification of members, but we expect objects to be
492 %%% have only a few members. Lists also print better.
493
494 %% Is this a proper JSON object representation?
495
496 is_obj({json_object, Props}) when is_list(Props) ->
497 lists:all(fun
498 ({Member, _Value}) when is_atom(Member); is_list(Member) -> true;
499 (_) -> false
500 end, Props);
501
502 is_obj(_) ->
503 false.
504
505 %% Create a new, empty object.
506
507 obj_new() ->
508 {json_object, []}.
509
510 %% Fetch an object member's value, expecting it to be in the object.
511 %% Return value, runtime error if no member found with that name.
512
513 obj_fetch(Key, {json_object, Props}) when is_list(Props) ->
514 case proplists:get_value(Key, Props) of
515 undefined ->
516 exit({json_object_no_key, Key});
517 Value ->
518 Value
519 end.
520
521 %% Fetch an object member's value, or indicate that there is no such member.
522 %% Return {ok, Value} or 'error'.
523
524 obj_find(Key, {json_object, Props}) when is_list(Props) ->
525 case proplists:get_value(Key, Props) of
526 undefined ->
527 error;
528 Value ->
529 {ok, Value}
530 end.
531
532 obj_is_key(Key, {json_object, Props}) ->
533 proplists:is_defined(Key, Props).
534
535 %% Store a new member in an object. Returns a new object.
536
537 obj_store(Key, Value, {json_object, Props}) when is_list(Props) ->
538 {json_object, [{Key, Value} | proplists:delete(Key, Props)]}.
539
540 %% Create an object from a list of Key/Value pairs.
541
542 obj_from_list(Props) ->
543 Obj = {json_object, Props},
544 case is_obj(Obj) of
545 true -> Obj;
546 false -> exit(json_bad_object)
547 end.
548
549 %% Fold Fun across object, with initial accumulator Acc.
550 %% Fun should take (Value, Acc) as arguments and return Acc.
551
552 obj_fold(Fun, Acc, {json_object, Props}) ->
553 lists:foldl(Fun, Acc, Props).
554
555 %%% TESTING
556 %%%
557 %%% We can't expect to round-trip from JSON -> Erlang -> JSON,
558 %%% due to the degrees of freedom in the JSON syntax: whitespace,
559 %%% and ordering of object members. We can, however, expect to
560 %%% round-trip from Erlang -> JSON -> Erlang, so the JSON parsing
561 %%% tests will in fact test the Erlang equivalence of the
562 %%% JSON -> Erlang -> JSON -> Erlang coding chain.
563
564 %% Test driver. Return 'ok' or {failed, Failures}.
565
566 test() ->
567 E2Js = e2j_test_vec(),
568 Failures = lists:foldl(fun({E, J}, Fs) ->
569 case (catch test_e2j(E, J)) of
570 ok ->
571 case (catch round_trip(E)) of
572 ok ->
573 case (catch round_trip_one_char(E)) of
574 ok -> Fs;
575 Reason -> [{round_trip_one_char, E, Reason} | Fs]
576 end;
577 Reason ->
578 [{round_trip, E, Reason} | Fs]
579 end;
580 Reason ->
581 [{erlang_to_json, E, J, Reason} | Fs]
582 end;
583 (end_of_tests, Fs) -> Fs end, [], E2Js),
584 case Failures of
585 [] -> ok;
586 _ -> {failed, Failures}
587 end.
588
589 %% Test for conversion from Erlang to JSON. Note that unequal strings
590 %% may represent equal JSON data, due to discretionary whitespace,
591 %% object member order, trailing zeroes in floating point, etc.
592 %% Legitimate changes to the encoding routines may require tweaks to
593 %% the reference JSON strings in e2j_test_vec().
594
595 test_e2j(E, J) ->
596 J2 = lists:flatten(encode(E)),
597 J = J2, % raises error if unequal
598 ok.
599
600 %% Test that Erlang -> JSON -> Erlang round-trip yields equivalent term.
601
602 round_trip(E) ->
603 J2 = lists:flatten(encode(E)),
604 {ok, E2} = decode_string(J2),
605 true = equiv(E, E2), % raises error if false
606 ok.
607
608 %% Round-trip with one character at a time to test all continuations.
609
610 round_trip_one_char(E) ->
611 J = lists:flatten(encode(E)),
612 {done, {ok, E2}, _} = lists:foldl(fun(C, Ret) ->
613 case Ret of
614 {done, _, _} -> Ret;
615 {more, Cont} -> decode(Cont, [C])
616 end
617 end, {more, first_continuation()}, J ++ [eof]),
618 true = equiv(E, E2), % raises error if false
619 ok.
620
621 %% Test for equivalence of Erlang terms.
622 %% Due to arbitrary order of construction, equivalent objects might
623 %% compare unequal as erlang terms, so we need to carefully recurse
624 %% through aggregates (tuples and objects).
625
626 equiv({json_object, Props1}, {json_object, Props2}) ->
627 equiv_object(Props1, Props2);
628 equiv(T1, T2) when is_tuple(T1), is_tuple(T2) ->
629 equiv_tuple(T1, T2);
630 equiv(N1, N2) when is_number(N1), is_number(N2) -> N1 == N2;
631 equiv(S1, S2) when is_list(S1), is_list(S2) -> S1 == S2;
632 equiv(true, true) -> true;
633 equiv(false, false) -> true;
634 equiv(null, null) -> true.
635
636 %% Object representation and traversal order is unknown.
637 %% Use the sledgehammer and sort property lists.
638
639 equiv_object(Props1, Props2) ->
640 L1 = lists:keysort(1, Props1),
641 L2 = lists:keysort(1, Props2),
642 Pairs = lists:zip(L1, L2),
643 true = lists:all(fun({{K1, V1}, {K2, V2}}) ->
644 equiv(K1, K2) and equiv(V1, V2)
645 end, Pairs).
646
647 %% Recursively compare tuple elements for equivalence.
648
649 equiv_tuple({}, {}) ->
650 true;
651 equiv_tuple(T1, T2) when size(T1) == size(T2) ->
652 S = size(T1),
653 lists:all(fun(I) ->
654 equiv(element(I, T1), element(I, T2))
655 end, lists:seq(1, S)).
656
657 e2j_test_vec() -> [
658 {1, "1"},
659 {3.1416, "3.14160"}, % text representation may truncate, trail zeroes
660 {-1, "-1"},
661 {-3.1416, "-3.14160"},
662 {12.0e10, "1.20000e+11"},
663 {1.234E+10, "1.23400e+10"},
664 {-1.234E-10, "-1.23400e-10"},
665 {"foo", "\"foo\""},
666 {"foo" ++ [500] ++ "bar", [$", $f, $o, $o, 500, $b, $a, $r, $"]},
667 {"foo" ++ [5] ++ "bar", "\"foo\\u0005bar\""},
668 {"", "\"\""},
669 {[], "\"\""},
670 {"\n\n\n", "\"\\n\\n\\n\""},
671 {obj_new(), "{}"},
672 {obj_from_list([{"foo", "bar"}]), "{\"foo\":\"bar\"}"},
673 {obj_from_list([{"foo", "bar"}, {"baz", 123}]),
674 "{\"foo\":\"bar\",\"baz\":123}"},
675 {{}, "[]"},
676 {{{}}, "[[]]"},
677 {{1, "foo"}, "[1,\"foo\"]"},
678
679 % json array in a json object
680 {obj_from_list([{"foo", {123}}]),
681 "{\"foo\":[123]}"},
682
683 % json object in a json object
684 {obj_from_list([{"foo", obj_from_list([{"bar", true}])}]),
685 "{\"foo\":{\"bar\":true}}"},
686
687 % fold evaluation order
688 {obj_from_list([{"foo", {}},
689 {"bar", obj_from_list([{"baz", true}])},
690 {"alice", "bob"}]),
691 "{\"foo\":[],\"bar\":{\"baz\":true},\"alice\":\"bob\"}"},
692
693 % json object in a json array
694 {{-123, "foo", obj_from_list([{"bar", {}}]), null},
695 "[-123,\"foo\",{\"bar\":[]},null]"},
696
697 end_of_tests
698 ].
699
700 %%% TODO:
701 %%%
702 %%% Measure the overhead of the CPS-based parser by writing a conventional
703 %%% scanner-parser that expects all input to be available.
704 %%%
705 %%% JSON has dropped comments - disable their parsing.
706 %%%
707 %%% Allow a compile-time option to decode object member names as atoms,
708 %%% to reduce the internal representation overheads when communicating
709 %%% with trusted peers.