Easy to use
Zero-config by default. Works in Node.js, Bun, and Electron. Bootstrap a project with a single command
Learn more
node.js bindings for llama.cpp, and much more
gpt-oss is here!
Experience the ease of running models on your machine
npx -y node-llama-cpp chatTo chat with models using a UI, try the example Electron app
Check out your hardware capabilities
npx -y node-llama-cpp inspect gpuEverything you need to use large language models in your project
Integrate node-llama-cpp in your codebase and prompt models
import {fileURLToPath} from "url"; import path from "path"; import {getLlama, LlamaChatSession} from "node-llama-cpp"; const __dirname = path.dirname( fileURLToPath(import.meta.url) ); const llama = await getLlama(); const model = await llama.loadModel({ modelPath: path.join(__dirname, "my-model.gguf") }); const context = await model.createContext(); const session = new LlamaChatSession({ contextSequence: context.getSequence() }); const q1 = "Hi there, how are you?"; console.log("User: " + q1); const a1 = await session.prompt(q1); console.log("AI: " + a1);Get embedding for a given text
import {fileURLToPath} from "url"; import path from "path"; import {getLlama} from "node-llama-cpp"; const __dirname = path.dirname( fileURLToPath(import.meta.url) ); const llama = await getLlama(); const model = await llama.loadModel({ modelPath: path.join(__dirname, "my-model.gguf") }); const context = await model.createEmbeddingContext(); const text = "Hello world"; console.log("Text:", text); const embedding = await context.getEmbeddingFor(text); console.log("Embedding vector:", embedding.vector);Force a model response to follow your JSON schema
import {fileURLToPath} from "url"; import path from "path"; import {getLlama, LlamaChatSession} from "node-llama-cpp"; const __dirname = path.dirname( fileURLToPath(import.meta.url) ); const llama = await getLlama(); const model = await llama.loadModel({ modelPath: path.join(__dirname, "my-model.gguf") }); const context = await model.createContext(); const session = new LlamaChatSession({ contextSequence: context.getSequence() }); const grammar = await llama.createGrammarForJsonSchema({ type: "object", properties: { positiveWordsInUserMessage: { type: "array", items: { type: "string" } }, userMessagePositivityScoreFromOneToTen: { enum: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] }, nameOfUser: { oneOf: [{ type: "null" }, { type: "string" }] } } }); const prompt = "Hi there! I'm John. Nice to meet you!"; const res = await session.prompt(prompt, { grammar }); const parsedRes = grammar.parse(res); console.log("User name:", parsedRes.nameOfUser); console.log( "Positive words in user message:", parsedRes.positiveWordsInUserMessage ); console.log( "User message positivity score:", parsedRes.userMessagePositivityScoreFromOneToTen );Let a model call functions to retrieve data or perform actions
import {fileURLToPath} from "url"; import path from "path"; import { getLlama, LlamaChatSession, defineChatSessionFunction } from "node-llama-cpp"; const __dirname = path.dirname( fileURLToPath(import.meta.url) ); const llama = await getLlama(); const model = await llama.loadModel({ modelPath: path.join(__dirname, "my-model.gguf") }); const context = await model.createContext(); const session = new LlamaChatSession({ contextSequence: context.getSequence() }); const fruitPrices: Record<string, string> = { "apple": "$6", "banana": "$4" }; const functions = { getFruitPrice: defineChatSessionFunction({ description: "Get the price of a fruit", params: { type: "object", properties: { name: { type: "string" } } }, async handler(params) { const name = params.name.toLowerCase(); if (Object.keys(fruitPrices).includes(name)) return { name: name, price: fruitPrices[name] }; return `Unrecognized fruit "${params.name}"`; } }) }; const q1 = "Is an apple more expensive than a banana?"; console.log("User: " + q1); const a1 = await session.prompt(q1, {functions}); console.log("AI: " + a1);