9#include "absl/strings/ascii.h"
10#include "absl/strings/match.h"
11#include "absl/strings/str_format.h"
12#include "absl/strings/str_join.h"
22std::string
EscapeJson(
const std::string& str) {
49 ss <<
"\\u" << std::hex << std::setw(4) << std::setfill(
'0')
50 <<
static_cast<int>(c);
61std::string ConvertHistoryToGeminiFormat(
62 const std::vector<agent::ChatMessage>& history) {
63 nlohmann::json contents = nlohmann::json::array();
65 for (
const auto& msg : history) {
67 part[
"text"] = msg.message;
69 nlohmann::json content;
70 content[
"parts"] = nlohmann::json::array({part});
74 contents.push_back(content);
77 return contents.dump();
88std::string NormalizeBrowserProvider(std::string provider) {
89 provider = absl::AsciiStrToLower(provider);
90 if (provider.empty()) {
106bool IsLikelyLocalApiBase(
const std::string& base) {
107 const std::string lower = absl::AsciiStrToLower(base);
108 return absl::StrContains(lower,
"localhost") ||
109 absl::StrContains(lower,
"127.0.0.1") ||
110 absl::StrContains(lower,
"0.0.0.0") || absl::StrContains(lower,
"::1");
113std::string InferModelFamily(
const std::string& model_name) {
114 if (model_name.empty()) {
117 const size_t slash = model_name.find(
'/');
118 const size_t dash = model_name.find(
'-');
119 const size_t delimiter = std::min(slash, dash);
120 if (delimiter != std::string::npos) {
121 return model_name.substr(0, delimiter);
126void AddCurrentModelFallback(std::vector<ModelInfo>* models,
127 const std::string& provider,
128 const std::string& model_name,
bool is_local,
129 const std::string& description) {
130 if (!models || model_name.empty()) {
133 for (
const auto& model : *models) {
134 if (model.name == model_name) {
138 models->insert(models->begin(), {.name = model_name,
139 .display_name = model_name,
140 .provider = provider,
141 .description = description,
142 .family = InferModelFamily(model_name),
143 .is_local = is_local});
148BrowserAIService::BrowserAIService(
149 const BrowserAIConfig& config,
150 std::unique_ptr<net::IHttpClient> http_client)
152 base_system_instruction_(config.system_instruction),
153 http_client_(std::move(http_client)) {
155 config_.provider = NormalizeBrowserProvider(config_.provider);
157 if (config_.provider == kProviderOpenAi) {
158 if (config_.model.empty()) {
159 config_.model =
"gpt-4o-mini";
161 if (config_.api_base.empty()) {
162 config_.api_base = kOpenAIApiBaseUrl;
164 }
else if (config_.provider == kProviderLmStudio) {
165 if (config_.api_base.empty()) {
166 config_.api_base =
"http://localhost:1234/v1";
168 }
else if (config_.provider == kProviderHalext ||
169 config_.provider == kProviderAfsBridge) {
170 if (config_.api_base.empty()) {
171 config_.api_base =
"https://halext.org/v1";
174 if (config_.model.empty()) {
175 config_.model =
"gemini-2.5-flash";
181 LogDebug(
"Warning: No HTTP client provided to BrowserAIService");
186 http_client_->SetTimeout(config_.timeout_seconds);
189 LogDebug(absl::StrFormat(
"BrowserAIService initialized with model: %s",
193void BrowserAIService::SetRomContext(Rom* rom) {
194 std::lock_guard<std::mutex> lock(mutex_);
196 config_.system_instruction = base_system_instruction_;
197 if (rom_ && rom_->is_loaded()) {
198 const std::string rom_context = absl::StrFormat(
199 "The ROM file '%s' is currently loaded. Tailor advice to the active "
200 "project and loaded data.",
202 if (config_.system_instruction.empty()) {
203 config_.system_instruction =
204 "You are assisting with ROM hacking for a Zelda SNES project. " +
207 config_.system_instruction =
208 config_.system_instruction +
"\n\n" + rom_context;
213absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
214 const std::string& prompt) {
215 std::lock_guard<std::mutex> lock(mutex_);
217 return absl::FailedPreconditionError(
"HTTP client not initialized");
220 if (RequiresApiKey() && config_.api_key.empty()) {
222 return absl::InvalidArgumentError(
223 "OpenAI-compatible API key not set. Provide a key for remote "
224 "endpoints, or use a local OpenAI-compatible server.");
226 return absl::InvalidArgumentError(
227 "API key not set. Please provide a Gemini API key.");
230 LogDebug(absl::StrFormat(
"Generating response for prompt: %s", prompt));
233 std::string url = BuildApiUrl(
"generateContent");
236 std::string request_body;
238 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
239 url +=
"/chat/completions";
240 request_body = BuildOpenAIRequestBody(prompt,
nullptr);
242 request_body = BuildRequestBody(prompt);
246 net::Headers headers;
247 headers[
"Content-Type"] =
"application/json";
249 !config_.api_key.empty()) {
250 headers[
"Authorization"] =
"Bearer " + config_.api_key;
254 auto response_or = http_client_->Post(url, request_body, headers);
255 if (!response_or.ok()) {
256 return absl::InternalError(absl::StrFormat(
"Failed to make API request: %s",
257 response_or.status().message()));
260 const auto& response = response_or.value();
263 if (!response.IsSuccess()) {
264 if (response.IsClientError()) {
265 return absl::InvalidArgumentError(
266 absl::StrFormat(
"API request failed with status %d: %s",
267 response.status_code, response.body));
269 return absl::InternalError(absl::StrFormat(
270 "API server error %d: %s", response.status_code, response.body));
276 return ParseOpenAIResponse(response.body);
278 return ParseGeminiResponse(response.body);
281absl::StatusOr<AgentResponse> BrowserAIService::GenerateResponse(
282 const std::vector<agent::ChatMessage>& history) {
283 std::lock_guard<std::mutex> lock(mutex_);
285 return absl::FailedPreconditionError(
"HTTP client not initialized");
288 if (RequiresApiKey() && config_.api_key.empty()) {
290 return absl::InvalidArgumentError(
291 "OpenAI-compatible API key not set. Provide a key for remote "
292 "endpoints, or use a local OpenAI-compatible server.");
294 return absl::InvalidArgumentError(
295 "API key not set. Please provide a Gemini API key.");
298 if (history.empty()) {
299 return absl::InvalidArgumentError(
"Chat history cannot be empty");
303 absl::StrFormat(
"Generating response from %zu messages", history.size()));
306 std::string url = BuildApiUrl(
"generateContent");
308 std::string request_body;
310 url = config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
311 url +=
"/chat/completions";
312 request_body = BuildOpenAIRequestBody(
"", &history);
315 nlohmann::json request;
316 request[
"contents"] =
317 nlohmann::json::parse(ConvertHistoryToGeminiFormat(history));
320 request[
"generationConfig"][
"temperature"] = config_.temperature;
321 request[
"generationConfig"][
"maxOutputTokens"] = config_.max_output_tokens;
324 if (!config_.system_instruction.empty()) {
325 request[
"systemInstruction"][
"parts"][0][
"text"] =
326 config_.system_instruction;
329 request_body = request.dump();
333 net::Headers headers;
334 headers[
"Content-Type"] =
"application/json";
336 !config_.api_key.empty()) {
337 headers[
"Authorization"] =
"Bearer " + config_.api_key;
341 auto response_or = http_client_->Post(url, request_body, headers);
342 if (!response_or.ok()) {
343 return absl::InternalError(absl::StrFormat(
"Failed to make API request: %s",
344 response_or.status().message()));
347 const auto& response = response_or.value();
350 if (!response.IsSuccess()) {
351 return absl::InternalError(
352 absl::StrFormat(
"API request failed with status %d: %s",
353 response.status_code, response.body));
358 return ParseOpenAIResponse(response.body);
360 return ParseGeminiResponse(response.body);
363absl::StatusOr<std::vector<ModelInfo>> BrowserAIService::ListAvailableModels() {
364 std::lock_guard<std::mutex> lock(mutex_);
365 std::vector<ModelInfo> models;
367 const std::string provider =
371 const std::string base = GetOpenAIApiBase();
372 const bool is_local = IsLikelyLocalApiBase(base);
373 const bool is_official_openai =
376 net::Headers headers;
377 if (!config_.api_key.empty()) {
378 headers[
"Authorization"] =
"Bearer " + config_.api_key;
380 auto response_or = http_client_->Get(base +
"/models", headers);
381 if (response_or.ok() && response_or->IsSuccess()) {
382 auto json = nlohmann::json::parse(response_or->body,
nullptr,
false);
383 if (
json.is_object() &&
json.contains(
"data") &&
384 json[
"data"].is_array()) {
385 for (
const auto& entry :
json[
"data"]) {
386 if (!entry.is_object() || !entry.contains(
"id") ||
387 !entry[
"id"].is_string()) {
390 const std::string
id = entry[
"id"].get<std::string>();
392 is_local ?
"Discovered from local OpenAI-compatible endpoint"
393 :
"Discovered from OpenAI-compatible endpoint";
394 if (entry.contains(
"owned_by") && entry[
"owned_by"].is_string()) {
395 const std::string owner = entry[
"owned_by"].get<std::string>();
396 description = absl::StrFormat(
"Owned by %s", owner);
398 models.push_back({.name = id,
400 .provider = provider,
402 .family = InferModelFamily(
id),
403 .is_local = is_local});
405 if (!models.empty()) {
406 AddCurrentModelFallback(&models, provider, config_.model, is_local,
408 ?
"Configured local model"
409 :
"Configured OpenAI-compatible model");
416 if (!is_local || !config_.model.empty()) {
417 AddCurrentModelFallback(&models, provider, config_.model, is_local,
418 is_local ?
"Configured local model"
419 :
"Configured OpenAI-compatible model");
421 if (is_official_openai) {
422 models.push_back({.name =
"gpt-4o-mini",
423 .display_name =
"GPT-4o Mini",
424 .provider = provider,
425 .description =
"Fast/cheap OpenAI model",
428 models.push_back({.name =
"gpt-4o",
429 .display_name =
"GPT-4o",
430 .provider = provider,
431 .description =
"Balanced OpenAI flagship model",
434 models.push_back({.name =
"gpt-4.1-mini",
435 .display_name =
"GPT-4.1 Mini",
436 .provider = provider,
437 .description =
"Lightweight 4.1 variant",
443 {.name =
"gemini-2.5-flash",
444 .display_name =
"Gemini 2.0 Flash (Experimental)",
446 .description =
"Fastest Gemini model with experimental features",
450 models.push_back({.name =
"gemini-1.5-flash",
451 .display_name =
"Gemini 1.5 Flash",
453 .description =
"Fast and efficient for most tasks",
457 models.push_back({.name =
"gemini-1.5-flash-8b",
458 .display_name =
"Gemini 1.5 Flash 8B",
460 .description =
"Smaller, faster variant of Flash",
462 .parameter_size =
"8B",
465 models.push_back({.name =
"gemini-1.5-pro",
466 .display_name =
"Gemini 1.5 Pro",
468 .description =
"Most capable model for complex tasks",
476absl::StatusOr<AgentResponse> BrowserAIService::AnalyzeImage(
477 const std::string& image_data,
const std::string& prompt) {
478 std::lock_guard<std::mutex> lock(mutex_);
480 return absl::FailedPreconditionError(
"HTTP client not initialized");
484 return absl::UnimplementedError(
485 "Image analysis not yet supported for OpenAI-compatible providers in "
489 if (config_.api_key.empty()) {
490 return absl::InvalidArgumentError(
491 "API key not set. Please provide a Gemini API key.");
494 LogDebug(absl::StrFormat(
"Analyzing image with prompt: %s", prompt));
497 std::string url = BuildApiUrl(
"generateContent");
500 std::string mime_type =
"image/png";
501 if (image_data.find(
"data:image/jpeg") == 0 ||
502 image_data.find(
"data:image/jpg") == 0) {
503 mime_type =
"image/jpeg";
507 std::string clean_image_data = image_data;
508 size_t comma_pos = image_data.find(
',');
509 if (comma_pos != std::string::npos && image_data.find(
"data:") == 0) {
510 clean_image_data = image_data.substr(comma_pos + 1);
514 std::string request_body =
515 BuildMultimodalRequestBody(prompt, clean_image_data, mime_type);
518 net::Headers headers;
519 headers[
"Content-Type"] =
"application/json";
522 auto response_or = http_client_->Post(url, request_body, headers);
523 if (!response_or.ok()) {
524 return absl::InternalError(absl::StrFormat(
"Failed to make API request: %s",
525 response_or.status().message()));
528 const auto& response = response_or.value();
531 if (!response.IsSuccess()) {
532 return absl::InternalError(
533 absl::StrFormat(
"API request failed with status %d: %s",
534 response.status_code, response.body));
538 return ParseGeminiResponse(response.body);
541absl::Status BrowserAIService::CheckAvailability() {
542 std::lock_guard<std::mutex> lock(mutex_);
544 return absl::FailedPreconditionError(
"HTTP client not initialized");
547 if (RequiresApiKey() && config_.api_key.empty()) {
548 if (config_.provider == kProviderOpenAi) {
549 return absl::InvalidArgumentError(
550 "OpenAI API key not set. Provide a key for https://api.openai.com, "
551 "or use a local OpenAI-compatible endpoint.");
553 return absl::InvalidArgumentError(
"Gemini API key not set");
556 net::Headers headers;
560 url = GetOpenAIApiBase();
562 if (!config_.api_key.empty()) {
563 headers[
"Authorization"] =
"Bearer " + config_.api_key;
566 url = absl::StrFormat(
"%s%s?key=%s", kGeminiApiBaseUrl, config_.model,
570 auto response_or = http_client_->Get(url, headers);
572 if (!response_or.ok()) {
573 return absl::UnavailableError(
574 absl::StrFormat(
"Cannot reach %s API: %s", config_.provider,
575 response_or.status().message()));
578 const auto& response = response_or.value();
579 if (!response.IsSuccess()) {
580 if (response.status_code == 401 || response.status_code == 403) {
581 return absl::PermissionDeniedError(
"Invalid API key");
583 return absl::UnavailableError(absl::StrFormat(
584 "%s API returned error %d", config_.provider, response.status_code));
587 return absl::OkStatus();
590void BrowserAIService::UpdateApiKey(
const std::string& api_key) {
591 std::lock_guard<std::mutex> lock(mutex_);
592 config_.api_key = api_key;
596 LogDebug(
"API key updated");
599bool BrowserAIService::RequiresApiKey()
const {
601 return !IsLikelyLocalApiBase(GetOpenAIApiBase());
606std::string BrowserAIService::GetOpenAIApiBase()
const {
608 config_.api_base.empty() ? kOpenAIApiBaseUrl : config_.api_base;
609 if (!base.empty() && base.back() ==
'/') {
615std::string BrowserAIService::BuildApiUrl(
const std::string& endpoint)
const {
617 std::string base = GetOpenAIApiBase();
618 return absl::StrFormat(
"%s/%s", base, endpoint);
621 return absl::StrFormat(
"%s%s:%s?key=%s", kGeminiApiBaseUrl, config_.model,
622 endpoint, config_.api_key);
625std::string BrowserAIService::BuildRequestBody(
const std::string& prompt,
626 bool include_system)
const {
627 nlohmann::json request;
630 nlohmann::json user_part;
631 user_part[
"text"] = prompt;
633 nlohmann::json user_content;
634 user_content[
"parts"] = nlohmann::json::array({user_part});
635 user_content[
"role"] =
"user";
637 request[
"contents"] = nlohmann::json::array({user_content});
640 request[
"generationConfig"][
"temperature"] = config_.temperature;
641 request[
"generationConfig"][
"maxOutputTokens"] = config_.max_output_tokens;
644 if (include_system && !config_.system_instruction.empty()) {
645 nlohmann::json system_part;
646 system_part[
"text"] = config_.system_instruction;
647 request[
"systemInstruction"][
"parts"] =
648 nlohmann::json::array({system_part});
651 return request.dump();
654std::string BrowserAIService::BuildMultimodalRequestBody(
655 const std::string& prompt,
const std::string& image_data,
656 const std::string& mime_type)
const {
657 nlohmann::json request;
660 nlohmann::json text_part;
661 text_part[
"text"] = prompt;
663 nlohmann::json image_part;
664 image_part[
"inline_data"][
"mime_type"] = mime_type;
665 image_part[
"inline_data"][
"data"] = image_data;
667 nlohmann::json content;
668 content[
"parts"] = nlohmann::json::array({text_part, image_part});
669 content[
"role"] =
"user";
671 request[
"contents"] = nlohmann::json::array({content});
674 request[
"generationConfig"][
"temperature"] = config_.temperature;
675 request[
"generationConfig"][
"maxOutputTokens"] = config_.max_output_tokens;
678 if (!config_.system_instruction.empty()) {
679 nlohmann::json system_part;
680 system_part[
"text"] = config_.system_instruction;
681 request[
"systemInstruction"][
"parts"] =
682 nlohmann::json::array({system_part});
685 return request.dump();
688std::string BrowserAIService::BuildOpenAIRequestBody(
689 const std::string& prompt,
690 const std::vector<agent::ChatMessage>* history)
const {
691 nlohmann::json request;
692 request[
"model"] = config_.model.empty() ?
"gpt-4o-mini" : config_.model;
694 nlohmann::json messages = nlohmann::json::array();
695 if (!config_.system_instruction.empty()) {
697 {{
"role",
"system"}, {
"content", config_.system_instruction}});
700 if (history && !history->empty()) {
701 for (
const auto& msg : *history) {
703 {{
"role", msg.sender == agent::ChatMessage::Sender::kUser
706 {
"content", msg.message}});
708 }
else if (!prompt.empty()) {
709 messages.push_back({{
"role",
"user"}, {
"content", prompt}});
712 request[
"messages"] = messages;
713 request[
"temperature"] = config_.temperature;
714 request[
"max_tokens"] = config_.max_output_tokens;
716 return request.dump();
719absl::StatusOr<AgentResponse> BrowserAIService::ParseGeminiResponse(
720 const std::string& response_body)
const {
722 nlohmann::json
json = nlohmann::json::parse(response_body);
725 auto error_status = CheckForApiError(json);
726 if (!error_status.ok()) {
731 std::string text_content = ExtractTextFromCandidates(json);
733 if (text_content.empty()) {
734 return absl::InternalError(
"Empty response from Gemini API");
738 AgentResponse response;
739 response.text_response = text_content;
741 response.model = config_.model;
744 if (
json.contains(
"promptFeedback") &&
745 json[
"promptFeedback"].contains(
"safetyRatings")) {
746 for (
const auto& rating :
json[
"promptFeedback"][
"safetyRatings"]) {
747 if (rating.contains(
"probability") &&
748 rating[
"probability"] !=
"NEGLIGIBLE" &&
749 rating[
"probability"] !=
"LOW") {
750 response.warnings.push_back(absl::StrFormat(
751 "Content flagged: %s (%s)", rating.value(
"category",
"unknown"),
752 rating.value(
"probability",
"unknown")));
757 LogDebug(absl::StrFormat(
"Successfully parsed response with %zu characters",
758 text_content.length()));
762 }
catch (
const nlohmann::json::exception& e) {
763 return absl::InternalError(
764 absl::StrFormat(
"Failed to parse Gemini response: %s", e.what()));
768absl::StatusOr<AgentResponse> BrowserAIService::ParseOpenAIResponse(
769 const std::string& response_body)
const {
771 nlohmann::json
json = nlohmann::json::parse(response_body);
773 if (
json.contains(
"error")) {
774 const auto& err =
json[
"error"];
775 std::string message = err.value(
"message",
"Unknown error");
776 int code = err.value(
"code", 0);
777 if (code == 401 || code == 403)
778 return absl::UnauthenticatedError(message);
780 return absl::ResourceExhaustedError(message);
781 return absl::InternalError(message);
784 if (!
json.contains(
"choices") || !json[
"choices"].is_array() ||
785 json[
"choices"].empty()) {
786 return absl::InternalError(
"Empty response from OpenAI API");
789 const auto& choice =
json[
"choices"][0];
790 if (!choice.contains(
"message") || !choice[
"message"].contains(
"content")) {
791 return absl::InternalError(
"Malformed OpenAI response");
794 std::string text = choice[
"message"][
"content"].get<std::string>();
796 return absl::InternalError(
"OpenAI returned empty content");
799 AgentResponse response;
800 response.text_response = text;
801 response.provider = config_.provider;
802 response.model = config_.model;
804 }
catch (
const nlohmann::json::exception& e) {
805 return absl::InternalError(
806 absl::StrFormat(
"Failed to parse OpenAI response: %s", e.what()));
810std::string BrowserAIService::ExtractTextFromCandidates(
811 const nlohmann::json& json)
const {
812 if (!
json.contains(
"candidates") || !json[
"candidates"].is_array() ||
813 json[
"candidates"].empty()) {
817 const auto& candidate =
json[
"candidates"][0];
819 if (!candidate.contains(
"content") ||
820 !candidate[
"content"].contains(
"parts") ||
821 !candidate[
"content"][
"parts"].is_array() ||
822 candidate[
"content"][
"parts"].empty()) {
827 for (
const auto& part : candidate[
"content"][
"parts"]) {
828 if (part.contains(
"text")) {
829 result += part[
"text"].get<std::string>();
836absl::Status BrowserAIService::CheckForApiError(
837 const nlohmann::json& json)
const {
838 if (
json.contains(
"error")) {
839 const auto& error =
json[
"error"];
840 int code = error.value(
"code", 0);
841 std::string message = error.value(
"message",
"Unknown error");
842 std::string status = error.value(
"status",
"");
845 if (code == 400 || status ==
"INVALID_ARGUMENT") {
846 return absl::InvalidArgumentError(message);
847 }
else if (code == 401 || status ==
"UNAUTHENTICATED") {
848 return absl::UnauthenticatedError(message);
849 }
else if (code == 403 || status ==
"PERMISSION_DENIED") {
850 return absl::PermissionDeniedError(message);
851 }
else if (code == 429 || status ==
"RESOURCE_EXHAUSTED") {
852 return absl::ResourceExhaustedError(message);
853 }
else if (code == 503 || status ==
"UNAVAILABLE") {
854 return absl::UnavailableError(message);
856 return absl::InternalError(message);
860 return absl::OkStatus();
863void BrowserAIService::LogDebug(
const std::string& message)
const {
864 if (config_.verbose) {
867 { console.log(
'[BrowserAIService] ' + UTF8ToString($0)); },
bool IsOpenAiCompatibleProvider(absl::string_view provider)
std::string EscapeJson(const std::string &input)
constexpr char kProviderGemini[]
constexpr char kProviderGoogle[]
constexpr char kProviderCustomOpenAi[]
constexpr char kProviderHalext[]
constexpr char kProviderGoogleGemini[]
constexpr char kProviderOpenAiCompatible[]
constexpr char kProviderAfsBridge[]
constexpr char kProviderLmStudioDashed[]
constexpr char kProviderOpenAi[]
constexpr char kProviderLmStudio[]
Rom * rom()
Get the current ROM instance.