git clone https://github.com/aflin/rampart-langtools.git mkdir rampart-langtools/build && cd rampart-langtools/build cmake .. ## or for CUDA build cmake -DLT_ENABLE_GPU=1 .. # make and copy modules to current rampart install dir make install // load module var llamacpp=require('rampart-llamacpp'); // load model downloaded from huggingface var emb = llamacpp.initEmbed('all-minilm-l6-v2_f16.gguf'); var mytext = "about a paragraph of text follows..."; // create a semantic vector from text: // also available is embedTextToFp32Buf() and embedTextToNumbers() var v = emb.embedTextToFp16Buf(mytext); // v = {vecs[vec1, vec2, ...], avgVec: avgOfVecs} // If passage is not too large for model, v.vecs.length==1 // and v.vecs[0] == v.avgVec // Otherwise avgVec will be a renormalized average of vecs[] //store vector and text somewhere sql.exec("insert into vecs values (?,?,?,?)", [v.avgVec, docId, Title, Text]); //unload emb.destroy(); // load module var llamacpp=require('rampart-llamacpp'); // load model var rrmodel = process.scriptPath + '/data/models/bge-reranker-v2-m3-Q8_0.gguf'; var rr = llamacpp.initRerank(rrmodel); // get the score of how well a document/paragraph answers a question: var score = rr.rerank(qestion, mydoc); // options like nctx, n_threads_batch, batch, ubatch can also be set: // load module var llamacpp=require('rampart-llamacpp'); // load model var rrmodel = process.scriptPath + '/data/models/bge-reranker-v2-m3-Q8_0.gguf'; var rr = langtools.llamacpp.initRerank(rrmodel, {ubatch:256}); rampart.globalize(rampart.utils); // for printf, dateFmt and repl //example building index for about 30m vectors from a sql table named vecs: var faiss = require('rampart-faiss'); // see https://github.com/facebookresearch/faiss/wiki/The-index-factory // and https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index // Highly recommended that IDMap or IDMap2 is used to store artbitrary ids // associated with each vector. Otherwise the associated id will be sequentially // incremented starting with 0. var idx = langtools.faiss.openFactory("IDMap2,OPQ96,IVF262144,PQ48", 384); // the name we will eventually use for the saved index var indname = "all-minilm-vec.OPQ96_IVF262144_PQ48_faiss"; //if compiled for CUDA and available: idx.enableGpu(); printf("GPU Enabled\n"); // if index requires training, idx.trainer will be defined. if( idx.trainer) { // make a new trainer, save train vectors in new file ./tdata // or reload vectors in ./tdata and skip/continue to insert var trainer = new idx.trainer('tdata'); printf("%3J\n%s\n", trainer, dateFmt('%c %z')); //insert vectors into ./tdata file sql.exec("select Id, Vec from vecs", {skipRows:0, maxRows: 10000000}, function(row) { trainer.addTrainingfp16(row.Vec); // or addTrainingfp32() i++; if( ! (i%1000)) { printf("train inserted %d: %.0f\r", i, row.Id); fflush(stdout); } }); //train from vectors in ./tdata. printf("\n%s\nTraining, go get some coffee, read a book or two, don't touch the keyboard ...\n", dateFmt('%c %z')); trainer.train(); console.log(dateFmt('%c %z')); } var cpointf = sprintf("%s-trained", indname); printf("\n%s: Saving training %s\n", dateFmt('%c %z'), cpointf); idx.save(cpointf); // This is our trained, but empty index var res = sql.one("select count(Id) tot from vecs"); var tot=res.tot sql.exec("select Id, Vec from vecs", {maxRows:-1}, function(row,i) { // add vector using addFp16() or addFp32() idx.addFp16(row.Id, row.Vec); if( ! (i%10)) { printf("inserted %d of %d: %llu\r", i, tot, row.Id); // save a checkpoint every 2m inserts in case of interrupt if( ! (i%2000000) ) { var cpointf = sprintf("%s-%d", indname, i); printf("\n%s: Saving checkpoint %s\n", dateFmt('%c %z'), cpointf); idx.save(cpointf); } } i++; }); // done inserting, save with filename idx.save(indname); //test it out: var llamacpp = require('rampart-llamacpp'); var emb = llamacpp.initEmbed('all-minilm-l6-v2_f16.gguf'); printf("\nSemantic Vector Search Test\nEnter Query:\n"); var rl = repl("Query: "); while ( (l=rl.next()) ) { var v = emb.embedTextToFp16Buf(l); var res = idx.searchFp16(x.avgVec, /*nres = */10, /* nprobe = */128); printf("\nRESULTS:\n"); var ids = []; var idtoscore={}; res.forEach(function(r){ ids.push(r.id); idtoscore[r.id]=r.distance; }); //get results from sql table, reorder by actual cosine similarity, print sql.exec("select vecdist(Vec, ?, 'dot', 'f16') Dist, Id, Title, Text from vecs where Id in (?) order by 1 DESC", [x.avgVec, ids], function(sres,i){ printf("%as: %as, (%.2f : %.2f)\n%.80s\n", "green", i, "green", sres.Title, idtoscore[sres.Idsec], sres.Dist, sres.Text); } ); rl.refresh(); } var faiss = require('rampart-faiss'); var indname = "all-minilm-vec.OPQ96_IVF262144_PQ48_faiss"; // load index from file into ram var idx = faiss.openIndexFromFile(indname); // or open read only with memmap to serve from disk: var idx = faiss.openIndexFromFile(indname, true); // use just like in example above. var llamacpp = require('rampart-llamacpp'); var emb = llamacpp.initEmbed('all-minilm-l6-v2_f16.gguf'); var v = emb.embedTextToFp16Buf(myquery); var res = idx.searchFp16(x.avgVec, /*nres = */10, /* nprobe = */128); // res is an array of Ids inserted into the index var sp = require('rampart-sentencepiece'); // model from https://huggingface.co/BAAI/bge-m3/blob/main/sentencepiece.bpe.model var encoder = sp.init('./sentencepiece.bpe.model'); var encoded = encoder.encode('hello there you goat'); // encoded = ["▁hell","o","▁there","▁you","▁go","at"] var decoded = sp.decode(encoded); // = "hello there you goat" All the modules packaged into one.
var langtools = require('rampart-langtools'); var faiss = langtools.faiss; var llamacpp = langtools.llamacpp; Var sp = langtools.sentencepiece; - libgfortran.so.5
- libomp.so.5
- cuda libraries for gpu build on linux