yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
ollama_ai_service.cc
Go to the documentation of this file.
2
3#include <cstdlib>
4#include <iostream>
5
6#include "absl/strings/str_cat.h"
7#include "absl/strings/str_format.h"
8#include "absl/time/clock.h"
9#include "absl/time/time.h"
12
13#ifdef YAZE_WITH_JSON
14#include "httplib.h"
15#include "nlohmann/json.hpp"
16#endif
17
18namespace yaze {
19namespace cli {
20
21OllamaAIService::OllamaAIService(const OllamaConfig& config) : config_(config) {
22 // Load command documentation into prompt builder
23 if (auto status = prompt_builder_.LoadResourceCatalogue(""); !status.ok()) {
24 std::cerr << "⚠️ Failed to load agent prompt catalogue: "
25 << status.message() << std::endl;
26 }
27
28 if (config_.system_prompt.empty()) {
29 // Use enhanced prompting by default
30 if (config_.use_enhanced_prompting) {
31 config_.system_prompt =
32 prompt_builder_.BuildSystemInstructionWithExamples();
33 } else {
34 config_.system_prompt = BuildSystemPrompt();
35 }
36 }
37}
38
39std::string OllamaAIService::BuildSystemPrompt() {
40 // Fallback prompt if enhanced prompting is disabled
41 // Use PromptBuilder's basic system instruction
42 return prompt_builder_.BuildSystemInstruction();
43}
44
45void OllamaAIService::SetRomContext(Rom* rom) {
46 prompt_builder_.SetRom(rom);
47}
48
49absl::Status OllamaAIService::CheckAvailability() {
50#ifndef YAZE_WITH_JSON
51 return absl::UnimplementedError(
52 "Ollama service requires JSON support. "
53 "Build with -DZ3ED_AI=ON or -DYAZE_WITH_JSON=ON");
54#else
55 try {
56 httplib::Client cli(config_.base_url);
57 cli.set_connection_timeout(5); // 5 second timeout
58
59 auto res = cli.Get("/api/tags");
60 if (!res) {
61 return absl::UnavailableError(
62 absl::StrFormat("Cannot connect to Ollama server at %s.\n"
63 "Make sure Ollama is installed and running:\n"
64 " 1. Install: brew install ollama (macOS) or "
65 "https://ollama.com/download\n"
66 " 2. Start: ollama serve\n"
67 " 3. Verify: curl http://localhost:11434/api/tags",
68 config_.base_url));
69 }
70
71 if (res->status != 200) {
72 return absl::InternalError(
73 absl::StrFormat("Ollama server error: HTTP %d\nResponse: %s",
74 res->status, res->body));
75 }
76
77 // Check if requested model is available
78 nlohmann::json models_json = nlohmann::json::parse(res->body);
79 bool model_found = false;
80
81 if (models_json.contains("models") && models_json["models"].is_array()) {
82 for (const auto& model : models_json["models"]) {
83 if (model.contains("name")) {
84 std::string model_name = model["name"].get<std::string>();
85 if (model_name.find(config_.model) != std::string::npos) {
86 model_found = true;
87 break;
88 }
89 }
90 }
91 }
92
93 if (!model_found) {
94 return absl::NotFoundError(
95 absl::StrFormat("Model '%s' not found on Ollama server.\n"
96 "Pull it with: ollama pull %s\n"
97 "Available models: ollama list",
98 config_.model, config_.model));
99 }
100
101 return absl::OkStatus();
102 } catch (const std::exception& e) {
103 return absl::InternalError(
104 absl::StrCat("Ollama health check failed: ", e.what()));
105 }
106#endif
107}
108
109absl::StatusOr<std::vector<ModelInfo>> OllamaAIService::ListAvailableModels() {
110#ifndef YAZE_WITH_JSON
111 return absl::UnimplementedError("Requires httplib and JSON support");
112#else
113 try {
114 httplib::Client cli(config_.base_url);
115 cli.set_connection_timeout(5);
116
117 auto res = cli.Get("/api/tags");
118
119 if (!res || res->status != 200) {
120 return absl::UnavailableError(
121 "Cannot list Ollama models. Is the server running?");
122 }
123
124 nlohmann::json models_json = nlohmann::json::parse(res->body);
125 std::vector<ModelInfo> models;
126
127 if (models_json.contains("models") && models_json["models"].is_array()) {
128 for (const auto& model : models_json["models"]) {
129 ModelInfo info;
130 info.provider = kProviderOllama;
131 info.is_local = true;
132
133 if (model.contains("name") && model["name"].is_string()) {
134 info.name = model["name"].get<std::string>();
135 info.display_name = info.name;
136 }
137
138 if (model.contains("size")) {
139 if (model["size"].is_string()) {
140 info.size_bytes = std::strtoull(
141 model["size"].get<std::string>().c_str(), nullptr, 10);
142 } else if (model["size"].is_number_unsigned()) {
143 info.size_bytes = model["size"].get<uint64_t>();
144 }
145 }
146
147 if (model.contains("details") && model["details"].is_object()) {
148 const auto& details = model["details"];
149 info.parameter_size = details.value("parameter_size", "");
150 info.quantization = details.value("quantization_level", "");
151 info.family = details.value("family", "");
152
153 // Build description
154 std::string desc;
155 if (!info.family.empty())
156 desc += info.family + " ";
157 if (!info.parameter_size.empty())
158 desc += info.parameter_size + " ";
159 if (!info.quantization.empty())
160 desc += "(" + info.quantization + ")";
161 info.description = desc;
162 }
163 models.push_back(std::move(info));
164 }
165 }
166
167 return models;
168 } catch (const std::exception& e) {
169 return absl::InternalError(
170 absl::StrCat("Failed to list models: ", e.what()));
171 }
172#endif
173}
174
175absl::StatusOr<std::string> OllamaAIService::ParseOllamaResponse(
176 const std::string& json_response) {
177#if !YAZE_HAS_JSON
178 return absl::UnimplementedError("Requires JSON support");
179#else
180 try {
181 nlohmann::json response_json = nlohmann::json::parse(json_response);
182
183 if (!response_json.contains("response")) {
184 return absl::InvalidArgumentError(
185 "Ollama response missing 'response' field");
186 }
187
188 return response_json["response"].get<std::string>();
189 } catch (const nlohmann::json::exception& e) {
190 return absl::InternalError(
191 absl::StrCat("Failed to parse Ollama response: ", e.what()));
192 }
193#endif
194}
195
196absl::StatusOr<AgentResponse> OllamaAIService::GenerateResponse(
197 const std::string& prompt) {
198 return GenerateResponse(
199 {{{agent::ChatMessage::Sender::kUser, prompt, absl::Now()}}});
200}
201
202absl::StatusOr<AgentResponse> OllamaAIService::GenerateResponse(
203 const std::vector<agent::ChatMessage>& history) {
204#ifndef YAZE_WITH_JSON
205 return absl::UnimplementedError(
206 "Ollama service requires httplib and JSON support. "
207 "Install vcpkg dependencies or use bundled libraries.");
208#else
209 if (history.empty()) {
210 return absl::InvalidArgumentError("History cannot be empty.");
211 }
212
213 nlohmann::json messages = nlohmann::json::array();
214 for (const auto& chat_msg : history) {
215 if (chat_msg.is_internal) {
216 continue;
217 }
218 nlohmann::json entry;
219 entry["role"] = chat_msg.sender == agent::ChatMessage::Sender::kUser
220 ? "user"
221 : "assistant";
222 entry["content"] = chat_msg.message;
223 messages.push_back(std::move(entry));
224 }
225
226 if (messages.empty()) {
227 return absl::InvalidArgumentError(
228 "History does not contain any user/assistant messages.");
229 }
230
231 std::string fallback_prompt = prompt_builder_.BuildPromptFromHistory(history);
232
233 nlohmann::json request_body;
234 request_body["model"] = config_.model;
235 request_body["system"] = config_.system_prompt;
236 request_body["stream"] = config_.stream;
237 request_body["format"] = "json";
238
239 if (config_.use_chat_completions) {
240 request_body["messages"] = messages;
241 } else {
242 request_body["prompt"] = fallback_prompt;
243 }
244
245 nlohmann::json options = {{"temperature", config_.temperature},
246 {"top_p", config_.top_p},
247 {"top_k", config_.top_k},
248 {"num_predict", config_.max_tokens},
249 {"num_ctx", config_.num_ctx}};
250 request_body["options"] = options;
251
252 AgentResponse agent_response;
253 agent_response.provider = kProviderOllama;
254
255 try {
256 httplib::Client cli(config_.base_url);
257 cli.set_read_timeout(60); // Longer timeout for inference
258
259 const char* endpoint =
260 config_.use_chat_completions ? "/api/chat" : "/api/generate";
261 absl::Time request_start = absl::Now();
262 auto res = cli.Post(endpoint, request_body.dump(), "application/json");
263
264 if (!res) {
265 return absl::UnavailableError(
266 "Failed to connect to Ollama. Is 'ollama serve' running?\n"
267 "Start with: ollama serve");
268 }
269
270 if (res->status != 200) {
271 return absl::InternalError(absl::StrFormat(
272 "Ollama API error: HTTP %d\nResponse: %s", res->status, res->body));
273 }
274
275 // Parse Ollama's wrapper JSON
276 nlohmann::json ollama_wrapper;
277 try {
278 ollama_wrapper = nlohmann::json::parse(res->body);
279 } catch (const nlohmann::json::exception& e) {
280 return absl::InternalError(
281 absl::StrFormat("Failed to parse Ollama response: %s\nBody: %s",
282 e.what(), res->body));
283 }
284
285 // Extract the LLM's response from Ollama's "response" field
286 // For chat completions API, it's inside "message" -> "content"
287 std::string llm_output;
288 if (config_.use_chat_completions) {
289 if (ollama_wrapper.contains("message") &&
290 ollama_wrapper["message"].is_object() &&
291 ollama_wrapper["message"].contains("content")) {
292 llm_output = ollama_wrapper["message"]["content"].get<std::string>();
293 } else {
294 return absl::InvalidArgumentError(
295 "Ollama chat response missing 'message.content'");
296 }
297 } else {
298 if (ollama_wrapper.contains("response") &&
299 ollama_wrapper["response"].is_string()) {
300 llm_output = ollama_wrapper["response"].get<std::string>();
301 } else {
302 return absl::InvalidArgumentError(
303 "Ollama response missing 'response' field");
304 }
305 }
306
307 // Debug: Print raw LLM output when verbose mode is enabled
308 const char* verbose_env = std::getenv("Z3ED_VERBOSE");
309 if (verbose_env && std::string(verbose_env) == "1") {
310 std::cout << "\n"
311 << "\033[35m"
312 << "🔍 Raw LLM Response:"
313 << "\033[0m"
314 << "\n"
315 << "\033[2m" << llm_output << "\033[0m"
316 << "\n\n";
317 }
318
319 // Parse the LLM's JSON response (the agent structure)
320 nlohmann::json response_json;
321 try {
322 response_json = nlohmann::json::parse(llm_output);
323 } catch (const nlohmann::json::exception& e) {
324 // Sometimes the LLM includes extra text - try to extract JSON object
325 size_t start = llm_output.find('{');
326 size_t end = llm_output.rfind('}');
327
328 if (start != std::string::npos && end != std::string::npos &&
329 end > start) {
330 std::string json_only = llm_output.substr(start, end - start + 1);
331 try {
332 response_json = nlohmann::json::parse(json_only);
333 } catch (const nlohmann::json::exception&) {
334 agent_response.warnings.push_back(
335 "LLM response was not valid JSON; returning raw text.");
336 agent_response.text_response = llm_output;
337 return agent_response;
338 }
339 } else {
340 agent_response.warnings.push_back(
341 "LLM response did not contain a JSON object; returning raw text.");
342 agent_response.text_response = llm_output;
343 return agent_response;
344 }
345 }
346
347 agent_response.model = ollama_wrapper.value("model", config_.model);
348 agent_response.latency_seconds =
349 absl::ToDoubleSeconds(absl::Now() - request_start);
350 agent_response.parameters["temperature"] =
351 absl::StrFormat("%.2f", config_.temperature);
352 agent_response.parameters["top_p"] = absl::StrFormat("%.2f", config_.top_p);
353 agent_response.parameters["top_k"] = absl::StrFormat("%d", config_.top_k);
354 agent_response.parameters["num_predict"] =
355 absl::StrFormat("%d", config_.max_tokens);
356 agent_response.parameters["num_ctx"] =
357 absl::StrFormat("%d", config_.num_ctx);
358 agent_response.parameters["endpoint"] = endpoint;
359 if (response_json.contains("text_response") &&
360 response_json["text_response"].is_string()) {
361 agent_response.text_response =
362 response_json["text_response"].get<std::string>();
363 }
364 if (response_json.contains("reasoning") &&
365 response_json["reasoning"].is_string()) {
366 agent_response.reasoning = response_json["reasoning"].get<std::string>();
367 }
368 if (response_json.contains("tool_calls") &&
369 response_json["tool_calls"].is_array()) {
370 for (const auto& call : response_json["tool_calls"]) {
371 if (call.contains("tool_name") && call["tool_name"].is_string()) {
372 ToolCall tool_call;
373 tool_call.tool_name = call["tool_name"].get<std::string>();
374 if (call.contains("args") && call["args"].is_object()) {
375 for (auto& [key, value] : call["args"].items()) {
376 if (value.is_string()) {
377 tool_call.args[key] = value.get<std::string>();
378 }
379 }
380 }
381 agent_response.tool_calls.push_back(tool_call);
382 }
383 }
384 }
385 if (response_json.contains("commands") &&
386 response_json["commands"].is_array()) {
387 for (const auto& cmd : response_json["commands"]) {
388 if (cmd.is_string()) {
389 agent_response.commands.push_back(cmd.get<std::string>());
390 }
391 }
392 }
393
394 return agent_response;
395
396 } catch (const std::exception& e) {
397 return absl::InternalError(
398 absl::StrCat("Ollama request failed: ", e.what()));
399 }
400#endif
401}
402
403} // namespace cli
404} // namespace yaze
OllamaAIService(const OllamaConfig &)
constexpr char kProviderOllama[]
Definition provider_ids.h:8