-
Notifications
You must be signed in to change notification settings - Fork 17
/
makeschema.js
1 lines (1 loc) · 6.71 KB
/
makeschema.js
1
var sample=100;function schema(documents,options,dbname,collname){var SCHEMA_VERSION="0.7.1";function _rsplit(str,sep,maxsplit){var split=str.split(sep||/\s+/);return maxsplit ? [ split.slice(0,-maxsplit).join(sep)].concat(split.slice(-maxsplit)):split;} var _flatten=function(obj){function recursive(obj){var result={};for (var o in obj){if (!obj.hasOwnProperty(o))continue;if ((((typeof obj[o])==='object')&& (!obj["#array"])&& ([$t,$d].indexOf(o)===-1))||(o=="coordinates")){var flatObject=recursive(obj[o]);for (var x in flatObject){if (!flatObject.hasOwnProperty(x))continue;result[o+'.'+x]=flatObject[x];}}else{result[o]=obj[o];}}return result;} var flatobj=recursive(obj);var finalobj={};for (var f in flatobj){if (!flatobj.hasOwnProperty(f))continue;if (f.indexOf('.')!==-1){var split=_rsplit(f,'.',1);if (!(split[0] in finalobj)){finalobj[split[0]]={};} finalobj[split[0]][split[1]]=flatobj[f];} else{finalobj[f]=flatobj[f];}}return finalobj;};function _infer(schema,obj){schema[$c]=($c in schema)? schema[$c]+1:1;if (!($t in schema)){schema[$t]={};} if (typeof ObjectId!=='undefined' && obj instanceof ObjectId){type='objectid';schema[$t][type]=(type in schema[$t])? schema[$t][type]+1:1;return schema;} if (obj instanceof NumberLong||obj instanceof NumberInt){type='number';schema[$t][type]=(type in schema[$t])? schema[$t][type]+1:1;return schema;} if (obj instanceof Date){type='date';schema[$t][type]=(type in schema[$t])? schema[$t][type]+1:1;return schema;} if (obj===null){type='null';schema[$t][type]=(type in schema[$t])? schema[$t][type]+1:1;return schema;} var type=typeof obj;schema[$t][type]=(type in schema[$t])? schema[$t][type]+1:1;if (obj && typeof obj=='object'){Object.keys(obj).forEach(function(key){var val=obj[key];if (val=="")val=null;if (!(key in schema)){schema[key]={};} if (val instanceof Array){val.forEach(function (el){var doc={};doc[key]=el;_infer(schema,doc);});schema[$c] -=val.length;schema[key][$a]=true;return;} else{_infer(schema[key],val)}if (options.data && (Object.keys(schema[key][$t]).length===1)){if (!($d in schema[key])){schema[key][$d]={};} var d=schema[key][$d];switch (typeof val){case 'number': if (!('min' in d))d['min']=Infinity;if (!('max' in d))d['max']=-Infinity;d['min']=(val < d['min'])? val:d['min'];d['max']=(val>d['max'])? val:d['max'];break;case 'string': if (val in d){d[val]++;} else{if (Object.keys(d).length < options.data.maxCardinality){d[val]=1;} else{d[$o]=$o in d ? d[$o]+1:1;}}break;case 'object': if (val instanceof Date){if (!('min' in d))d['min']=new Date(100000000*86400000);if (!('max' in d))d['max']=new Date(-100000000*86400000);d['min']=(val.getTime()< d['min'].getTime())? val:d['min'];d['max']=(val.getTime()>d['max'].getTime())? val:d['max'];} break;}}});} return schema;} function _cleanup(schema,count){if (typeof schema!=='object'){return schema;} if (schema[$t]!==undefined){var type_keys=Object.keys(schema[$t]);if (type_keys.length===1){schema[$t]=type_keys[0];}}if (schema[$c]!==undefined){count=schema[$c];} if (schema[$d]!==undefined){if (!($t in schema)){delete schema[$d];} if (typeof schema[$t]==='object'){delete schema[$d];} if (schema[$t]==='boolean'){delete schema[$d];} if (schema[$t]==='null'){delete schema[$d];} if (schema[$t]==='string'){var values=Object.keys(schema[$d]).map(function (key){return schema[$d][key];});var maxCount=Math.max.apply(null,values);if (maxCount===1 && values.length>1){schema[$t]='text';delete schema[$d];} else{schema[$t]='category';}}} Object.keys(schema).forEach(function (key){if (key==='__schema')return;_cleanup(schema[key],count);});return schema;} function _uncleanup(schema){if (typeof schema!=='object'){return schema;} if (schema[$t]!==undefined){if (typeof schema[$t]!=='object'){var obj={};obj[schema[$t]]=schema[$c];schema[$t]=obj;}}if (schema[$t]!==undefined){var string_sum=(schema[$t].text||0)+(schema[$t].category||0);if (string_sum>0){if ('text' in schema[$t])delete schema[$t].text;if ('category' in schema[$t])delete schema[$t].category;schema[$t].string=string_sum;} }if (schema[$p]!==undefined){delete schema[$p];} Object.keys(schema).forEach(function (key){if (key==='__schema')return;_uncleanup(schema[key]);});return schema;} function _getObjectValues(obj){var values=Object.keys(obj).map(function (key){return obj[key];});return values;} function _mergeDefaults(defaults,obj){for (var key in obj){if (!obj.hasOwnProperty(key)){continue;} defaults[key]=obj[key];} return defaults;}var options=options||{};options.raw=options.raw||false;options.flat=options.flat===false ? false:true;options.data=options.data||false;options.merge=options.merge||false;options.metavars=_mergeDefaults({ prefix: '#',count: 'count',type: 'type',data: 'data',array: 'array',other: 'other' },options.metavars);var metavar_names=_getObjectValues(options.metavars);var $c=options.metavars.prefix+options.metavars.count,$t=options.metavars.prefix+options.metavars.type,$d=options.metavars.prefix+options.metavars.data,$a=options.metavars.prefix+options.metavars.array,$o=options.metavars.prefix+options.metavars.other;if (options.data){if (typeof options.data!=='object'){options.data={};} options.data.maxCardinality=options.data.maxCardinality||100;} if (options.raw){var schema=options.merge.raw_schema||{};} else{var schema=options.merge ? _uncleanup(options.merge):{};} if (schema['__schema']!==undefined){var sver=schema['__schema'].version.split('.');var myver=SCHEMA_VERSION.split('.');if ((sver[0]!=myver[0])||(sver[0]===0 && (sver[1]!=myver[1]))){throw Error('cannot merge schema,version incompatible');}}documents.forEach(function (doc){schema=_infer(schema,doc);});if (!options.raw){schema=_cleanup(schema);delete schema[$t];} if (options.flat){schema=_flatten(schema);}if(options.merge){options.merge=true;} schema['__schema']={ version: SCHEMA_VERSION,options: options };if (options.raw){return{raw_schema: schema,cleanup: function(){return _cleanup(schema);}}} function firstKeyName (o){for (var propName in o){if (o.hasOwnProperty(propName)){return propName;}}} db.getSiblingDB(dbname).getCollection(collname).getIndexes().forEach(function(i){if (i.name.endsWith("2d"))schema[firstKeyName(i.key)]["#index2d"]=true;if (i.name.endsWith("2dsphere"))schema[firstKeyName(i.key)]["#index2dsphere"]=true;}); var cstats=db.getSiblingDB(dbname).getCollection(collname).stats(); schema["__schema"]["#totalcount"]=cstats["count"]; schema["__schema"]["#avgObjSize"]=cstats["size"]; schema["__schema"]["#size"]=cstats["avgObjSize"]; schema["__schema"]["#storageSize"]=cstats["storageSize"]; return schema; } if (typeof DBCollection!=='undefined'){DBCollection.prototype.schema=function(options){var options=options||{};options.samples=options.samples||100;if (options.samples==='all'){options.samples=0;} var cursor=this.find({},null,options.samples ,0 ,0); return schema(cursor,options,this._db.getName(),this._shortName);}}