vfs-index is simple indexer for simple data collection(json , csv, msgpack...) on OS's virtual file system. this indexer is not requred daemon process, no lock.
- cmd/vfs-index: indexing/merging/searching in terminal.
this data is bigger ( 100 milion row ?..) if you search jq, very slow.
vfs-index indexing these files in big collection data(csv, json ...). vfs-index can search wihtout locking in indexing. if indexing stop by ctrl-c , indexing will continue on run next index command.
$ cat test.json
[
{
"id": 130988433,
"name": "2011_04_24-1.m4v"
},
{
"id": 130988434,
"name": "2011_04_24-2.mp4"
},
{
"id": 130988435,
"name": "2011_04_24-3.mp4"
},
vfs-index detect data format by file extension. and support lz4 compression
.json
.csv
.json.lz4
.csv.lz4
support JSONL format in .json.
func DefaultOption() vfs.Option {
return vfs.RootDir("/Users/xtakei/vfs-idx")
}
idx, e := vfs.Open("/Users/xtakei/example/data", DefaultOption())
e = idx.Regist("test", "id")
searching
func DefaultOption() vfs.Option {
return vfs.RootDir("/Users/xtakei/vfs-idx")
}
// search number index
idx, e := vfs.Open("/Users/xtakei/example/data", DefaultOption())
sCond := idx.On("test", vfs.ReaderColumn("id"), vfs.Output(vfs.MapInfOutput))
record := sCond.Select(func(m vfs.SearchCondElem2) bool {
return m.Op("id", "<", 122878513)
}).First()
search by number attributes/field
// search by matching substring
sCondName := idx.On("test", vfs.ReaderColumn("name"), vfs.Output(vfs.MapInfOutput))
matches2 := sCondName.Match("ロシア人").All()
merginge stop after 1 minutes.
idx, e := vfs.Open("/Users/xtakei/example/data", DefaultOption())
sCond := idx.On("test",
vfs.ReaderColumn("id"),
vfs.Output(vfs.MapInfOutput),
vfs.MergeDuration(time.Second*time.Duration(60)))
sCond.StartMerging()
sCond.CancelAndWait()
if attribute/field is number, can select by Arithmetic comparison. support comparation ops. == >= < <= > .
idx, e := vfs.Open("/Users/xtakei/example/data", DefaultOption())
sCond := idx.On("test", vfs.Output(vfs.MapInfOutput))
results := sCond.Query(qstr).All(`title.search("鬼滅の") && id == 3365460`)
idx, e := vfs.Open("/Users/xtakei/example/data", DefaultOption())
sCond := idx.On("test", vfs.ReaderColumn("title"), vfs.Output(vfs.MapInfOutput))
matches := sCond.Match("鬼滅").All()
idx, e := vfs.Open("/Users/xtakei/example/data", DefaultOption())
sCond := idx.On("test", vfs.ReaderColumn("title"), vfs.Output(vfs.MapInfOutput))
matches := sCond.Select(func(cond vfs.SearchElem) bool {
return cond.Op("title", "==", "警視庁")
}).All()
Josn output
jsonstr := sCond.Select(func(cond vfs.SearchElem) bool {
return cond.Op("title", "==", "警視庁")
}).All(vfs.ResultOutput("json"))
csv output
csvstr := sCond.Select(func(cond vfs.SearchElem) bool {
return cond.Op("title", "==", "警視庁")
}).All(vfs.ResultOutput("csv"))
- write file list
- read file list
- write number column index
- read number column index
- write tri-gram clumn index
- support string search
- support index merging
- add comment
- csv support
- msgpack support
- support compression data ( lz4(?) )
- binary search mergedindex file
- improve performance to search Comparison operator(exclude equal) with stream
- improve performance to search in merged index
- improve performance to search in non-merged index