From e1423a70d5ea04d6714c8471ce65228c6afa6951 Mon Sep 17 00:00:00 2001 From: Gustavo Cordova Avila Date: Wed, 9 Nov 2022 14:37:15 -0800 Subject: [PATCH] Initial commit --- .gitignore | 2 + README.md | 45 +++++++ _envrc | 4 + demo/data/fst/.delete.me | 1 + demo/data/kv/.delete.me | 1 + demo/docker-compose.yml | 18 +++ demo/sonic.cfg | 51 ++++++++ sc.nimble | 12 ++ src/nim.cfg | 0 src/sc.nim | 245 +++++++++++++++++++++++++++++++++++++++ static/usage.txt | 66 +++++++++++ 11 files changed, 445 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 _envrc create mode 100644 demo/data/fst/.delete.me create mode 100644 demo/data/kv/.delete.me create mode 100644 demo/docker-compose.yml create mode 100644 demo/sonic.cfg create mode 100644 sc.nimble create mode 100644 src/nim.cfg create mode 100644 src/sc.nim create mode 100644 static/usage.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7d39a53 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/dist +.envrc diff --git a/README.md b/README.md new file mode 100644 index 0000000..bea3ac0 --- /dev/null +++ b/README.md @@ -0,0 +1,45 @@ +# SonicClient +This is a rudimentary (Sonic)[https://github.com/valeriansaliou/sonic] +command-line client that I'm using to interact with a locally running +service. + +## Server +You'll need a server running, to spin one up you can +use the `docker-compose.yml` file in the `demo` +directory, which pulls down the docker image from +the docker hub. + +The required directory structure (`data/kv` and `data/fst`) +is already in place, so to spin it up you only need to: + +```sh +$ docker-compose up -d; docker-compose logs -f +``` + +## Client +To build the client you need a (Nim)[https://nim-lang.org/] compiler +for your target architecture; once it's available, along with the +`nimble` tool, you can build the client with: +```sh +$ nimble build --verbose -d:release +``` +and the binary will be left in the `./dist` directory. + +## Usage +Once the client is build, run: +``` +$ ./dist/sc --help +``` +to display the commands and options required. + +## Environment variables +The file `_envrc` contains an `.envrc` template for your +convenience; the client needs these three environment variables +set up so it knows what server to interact with: + + * `SONIC_HOST` + * `SONIC_PORT` + * `SONIC_SECRET` + +You may use any method to set them up, and the `.envrc` method +is just a convenience for those that use `direnv`. diff --git a/_envrc b/_envrc new file mode 100644 index 0000000..a6a0b34 --- /dev/null +++ b/_envrc @@ -0,0 +1,4 @@ +#!/bin/bash +export SONIC_HOST=127.0.0.1 # or some other ip address or hostname +export SONIC_PORT=1491 # or some other port? +export SONIC_SECRET="PthRtZ0m2MFLG" # or whatever password you set up diff --git a/demo/data/fst/.delete.me b/demo/data/fst/.delete.me new file mode 100644 index 0000000..c0b74f5 --- /dev/null +++ b/demo/data/fst/.delete.me @@ -0,0 +1 @@ +index data directory diff --git a/demo/data/kv/.delete.me b/demo/data/kv/.delete.me new file mode 100644 index 0000000..c0b74f5 --- /dev/null +++ b/demo/data/kv/.delete.me @@ -0,0 +1 @@ +index data directory diff --git a/demo/docker-compose.yml b/demo/docker-compose.yml new file mode 100644 index 0000000..222f23f --- /dev/null +++ b/demo/docker-compose.yml @@ -0,0 +1,18 @@ +--- +version: '3.8' + +services: + sonic: + image: valeriansaliou/sonic:v1.4.0 + volumes: + - type: bind + source: ./sonic.cfg + target: /etc/sonic.cfg + - type: bind + source: ./data + target: /data + ports: + - 1491:1491 + restart: unless-stopped + +# vim: ai:et:ts=2:sw=2:wm=0: diff --git a/demo/sonic.cfg b/demo/sonic.cfg new file mode 100644 index 0000000..f7ce5f6 --- /dev/null +++ b/demo/sonic.cfg @@ -0,0 +1,51 @@ +# Sonic +# Fast, lightweight and schema-less search backend +# Configuration file +# Example: https://github.com/valeriansaliou/sonic/blob/master/config.cfg + +[server] +log_level = "info" + +[channel] +inet = "0.0.0.0:1491" +tcp_timeout = 300 +auth_password = "PthRtZ0m2MFLG" + +[channel.search] +query_limit_default = 10 +query_limit_maximum = 100 +query_alternates_try = 4 +suggest_limit_default = 5 +suggest_limit_maximum = 20 +list_limit_default = 100 +list_limit_maximum = 500 + +[store] + +[store.kv] +path = "/data/kv/" +retain_word_objects = 1000 + +[store.kv.pool] +inactive_after = 1800 + +[store.kv.database] +flush_after = 900 +compress = true +parallelism = 2 +max_files = 100 +max_compactions = 1 +max_flushes = 1 +write_buffer = 16384 +write_ahead_log = true + +[store.fst] +path = "/data/fst/" + +[store.fst.pool] +inactive_after = 300 + +[store.fst.graph] +consolidate_after = 180 +max_size = 2048 +max_words = 250000 diff --git a/sc.nimble b/sc.nimble new file mode 100644 index 0000000..cffc50e --- /dev/null +++ b/sc.nimble @@ -0,0 +1,12 @@ +# Package +version = "0.1.0" +author = "Gustavo Cordova Avila" +description = "Sonic search infra client" +license = "Apache-2.0" +srcDir = "src" +binDir = "dist" +bin = @["sc"] + +# Dependencies +requires "nim >= 1.6.8" +requires "sonic >= 0.1.0" diff --git a/src/nim.cfg b/src/nim.cfg new file mode 100644 index 0000000..e69de29 diff --git a/src/sc.nim b/src/sc.nim new file mode 100644 index 0000000..b9390be --- /dev/null +++ b/src/sc.nim @@ -0,0 +1,245 @@ +## Ingest a file's content into Sonic +import os +import strutils +import sonic + +var + verbose: bool = false + +const + USAGE_DOC = staticRead("../static/usage.txt") + +proc showUsage(msg = "") = + ## Display a usage message and quit + if msg != "": + stderr.writeLine "ERROR: $#\n" % msg + quit USAGE_DOC.replace("${app}", getAppFilename()), + (if msg == "": 0 else: 10) + +proc envParam(name: string; default = ""): string = + ## Return an environment parameter or return an error + let eVal = getEnv(name, default) + if eVal == "": + quit("Expected a value: $" & name, 1) + return eVal + +proc getChannel(mode = SonicChannel.Ingest): Sonic = + ## Return a channel based on command line params + let + host = envParam("SONIC_HOST") + port = envParam("SONIC_PORT") + secret = envParam("SONIC_SECRET") + try: + return open(host, port.parseInt(), secret, mode) + except ValueError: + let err = getCurrentExceptionMsg() + quit "Not a valid number: $#\n$#" % [port, err], 1 + +proc consolidate(channel: Sonic) = + ## Trigger a consolidation + discard channel.execCommand("TRIGGER", @["consolidate"]) + +proc close(chn: Sonic) = + ## Close a sonic channel + let outp = chn.quit() + if verbose: + stderr.writeLine "" % [$(chn.channel), outp] + +proc intAt(args: openArray[string]; pos: int; default: int): int = + ## Parse a positional parameter if it exists, if not use default + if args.len-1 < pos or args[pos] == "": + return default + try: + return args[pos].parseInt() + except: + let err = getCurrentExceptionMsg() + quit("Not a valid number: $#\n$#" % [args[pos], err], 1) + +################################################################ +## Execute the user-facing commands +## +proc cmdPing() = + ## Execute the "ping" command + let + chn = getChannel(SonicChannel.Control) + response = chn.execCommand("PING") + chn.close() + quit response, if response == "PONG": 0 else: 1 + +proc cmdCount(collection, bucket, objId: string) = + ## Return indexed search data count for collection/bucket/objId + let + chn = getChannel(SonicChannel.Search) + response = chn.count(collection, bucket, objId) + chn.close() + quit $response, 0 + +proc cmdPush(collection, bucket, obj, data: string) = + ## Ingest a file's content into a Sonic instance + var + justOne = false + stream = stdin + + if data.len == 0: + quit "Data is an empty string", 1 + elif data == "-": + stderr.writeLine "Reading from " + elif data[0] == '@': + let filename = data[1 ..< data.len] + try: + stream = open(filename, bufSize=8000) + except: + let err = getCurrentExceptionMsg() + quit "Can't open \"$#\":\n$#" % [filename, err], 10 + else: + justOne = true + + var + ingCh = getChannel(SonicChannel.Ingest) + ctlCh = getChannel(SonicChannel.Control) + rMsg = "" + rCode = 0 + + if justOne: + let pushedOk = ingCh.push(collection, bucket, obj, data) + ctlCh.consolidate() + rMsg = if pushedOk: "" else: "push command returned a warning" + rCode = if pushedOk: 0 else: 1 + else: + let + objIdPrefix = if data[0] == '@': "$#/$#:" % [obj, data[1 ..< data.len]] + else: obj & ":" + var + line = newStringOfCap(256) + count = 0 + stderr.write("push: ") + while stream.readLine(line): + let + objId = objIdPrefix & $count + pushed = ingCh.push(collection, bucket, objId, line) + stderr.write(if pushed: "." else: "x") + inc count + if (count mod 31) == 0: + ctlCh.consolidate() + stderr.write("#") + stderr.write("\n") + if data[0] == '@': + close(stream) + ctlCh.close() + ingCh.close() + quit rMsg, rCode + +proc cmdPop(collection, bucket, obj, data: string) = + ## Pop search data from the given collection/bucket/obj + if data.len > 0: + let + ingCh = getChannel(SonicChannel.Ingest) + ctlCh = getChannel(SonicChannel.Control) + popOut = ingCh.pop(collection, bucket, obj, data) + ctlCh.consolidate() + ctlCh.close() + ingCh.close() + quit $popOut, 0 + quit "Data is an empty string", 1 + +proc cmdQuery(collection, bucket, terms: string; limit, offset: int) = + ## Query the indexes, echo the results to stdout + let + srChn = getChannel(SonicChannel.Search) + results = srChn.query(collection, bucket, terms, limit, offset) + srChn.close() + quit(results.join("\n"), 0) + +proc cmdSuggest(collection, bucket, word: string; limit: int) = + ## Query suggestions based on the word + let + srChn = getChannel(SonicChannel.Search) + results = srChn.suggest(collection, bucket, word, limit) + srChn.close() + quit(results.join("\n"), 0) + +proc cmdFlush(collection: string; bucket=""; objId="") = + ## Flushes all indexed data for the given collection, bucket or object + let + cnChn = getChannel(SonicChannel.Control) + results = cnChn.flush(collection, bucket, objId) + cnChn.close() + quit($results, 0) + +################################################################ +## Parse the command line and dispatch appropriate actions +## +proc main() = + ## Parse the command line, dispatch the appropriate actions + var args = commandLineParams() + + if args.len == 0 or "-h" in args or "--help" in args: + showUsage() + + verbose = ("-v" in args) or ("--verbose" in args) + if verbose: + while "-v" in args: + args.delete(args.find("-v")) + while "--verbose" in args: + args.delete(args.find("--verbose")) + + case args[0]: + of "help": + showUsage() + + of "ping": # ping + if args.len > 1: + showUsage("'ping' takes no arguments") + cmdPing() + + of "count": # count [bucket [object]] + let + bucket = (if args.len >= 3: args[2] else: "") + objId = (if args.len >= 4: args[3] else: "") + if args.len > 4: + showUsage("Too many arguments for 'count'") + cmdCount(args[1], bucket, objId) + + of "push": # push "data|@filename|-" + if args.len != 5: + let pre = if args.len < 5: "Missing" else: "Too many" + showUsage(pre & " arguments for 'push'") + cmdPush(args[1], args[2], args[3], args[4]) + + of "pop": # pop "data" + if args.len != 5: + let pre = if args.len < 5: "Missing" else: "Too many" + showUsage(pre & " arguments for 'pop'") + cmdPop(args[1], args[2], args[3], args[4]) + + of "query": # query "terms" [limit=10] [offset=0] + if args.len < 4 or args.len > 6: + let pre = if args.len < 4: "Missing" else: "Too many" + showUsage(pre & " arguments for 'query'") + let + limit = args.intAt(4, 10) + offset = args.intAt(5, 0) + cmdQuery(args[1], args[2], args[3], limit, offset) + + of "suggest": # query "word" [limit=10] + if args.len < 4 or args.len > 5: + let pre = if args.len < 4: "Missing" else: "Too many" + showUsage(pre & " arguments for 'suggest'") + let + limit = args.intAt(4, 10) + cmdSuggest(args[1], args[2], args[3], limit) + + of "flush": # flush [bucket [object]] + if args.len < 3 or args.len > 5: + let pre = if args.len < 3: "Missing" else: "Too many" + showUsage(pre & " arguments for 'flush'") + let + bucket = (if args.len >= 3: args[3] else: "") + objId = (if args.len >= 4: args[4] else: "") + cmdFlush(args[2], bucket, objId) + + else: + showUsage("Unknown command: " & args[0]) + +main() +# Fin diff --git a/static/usage.txt b/static/usage.txt new file mode 100644 index 0000000..9d72029 --- /dev/null +++ b/static/usage.txt @@ -0,0 +1,66 @@ +Usage: +====== + ${app} ping + ${app} push "data|@filename|-" + ${app} pop "search data to pop" + ${app} query "search terms" [limit] [offset] + ${app} suggest "word" [limit] + ${app} flush [ bucket [object] ] + +Commands: +========= + ping - Verify server is connected, returns "PONG" + + count - Return the count of indexed data, requires: + + collection + + bucket; optional + + object; optional + + push - Ingest search data into the server, requires: + + collection + + bucket + + object + + "quoted text on the command line" + -or- + "@filename.ext" where to read the text + -or- + "-" to read from stdin + + pop - Pop data from the search indexes, requires: + + collection + + bucket + + object + + "quoted text on the command line" + + query - Query the indexes for information, requires: + + collection + + bucket + + "search terms" + + limit, optional; default: 10 + + offset, optional; default: 0 + + Outputs the object IDs that match the given search terms + + suggest - Request suggestions for a given word, requires: + + collection + + bucket + + "word" + + limit, optional; default: 10 + + Outputs the suggestions to stdout + + flush - Flush all index data for the given collection, bucket or object + + collection + + bucket; optional + + object; optional + + +Environment variables: +====================== +These variables point to the Sonic service the application +will be connecting to: + + + SONIC_HOST: Hostname or ip address of the service + + SONIC_PORT: Port that the service is listening on + + SONIC_SECRET: Password required to connect to sonic +