yaze 0.3.2
Link to the Past ROM Editor
 
Loading...
Searching...
No Matches
message_data.cc
Go to the documentation of this file.
1#include "message_data.h"
2
3#include <cctype>
4#include <fstream>
5#include <optional>
6#include <sstream>
7#include <string>
8
9#include "absl/strings/ascii.h"
10#include "absl/strings/str_format.h"
11#include "absl/strings/str_split.h"
12#include "core/rom_settings.h"
13#include "rom/snes.h"
14#include "rom/write_fence.h"
15#include "util/hex.h"
16#include "util/log.h"
17#include "util/macro.h"
18
19namespace yaze {
20namespace editor {
21
22namespace {
23
24bool IsWordChar(char c) {
25 const unsigned char uc = static_cast<unsigned char>(c);
26 return std::isalnum(uc) || c == '_';
27}
28
29bool MatchesWholeWordAt(std::string_view text, size_t pos, size_t len) {
30 const bool left_boundary = (pos == 0) || !IsWordChar(text[pos - 1]);
31 const size_t right_index = pos + len;
32 const bool right_boundary =
33 (right_index >= text.size()) || !IsWordChar(text[right_index]);
34 return left_boundary && right_boundary;
35}
36
37std::string LowercaseCopy(std::string_view input) {
38 std::string lowered(input);
39 for (char& c : lowered) {
40 c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
41 }
42 return lowered;
43}
44
45} // namespace
46
51
56
57uint8_t FindMatchingCharacter(char value) {
58 // CharEncoder contains duplicate glyph mappings (for example, space), so we
59 // choose the lowest byte value to keep reverse lookups deterministic.
60 uint8_t best_match = 0xFF;
61 const wchar_t target =
62 static_cast<wchar_t>(static_cast<unsigned char>(value));
63 for (const auto& [key, char_value] : CharEncoder) {
64 if (char_value != target) {
65 continue;
66 }
67 if (best_match == 0xFF || key < best_match) {
68 best_match = key;
69 }
70 }
71 return best_match;
72}
73
74int8_t FindDictionaryEntry(uint8_t value) {
75 if (value < DICTOFF || value == 0xFF) {
76 return -1;
77 }
78 return value - DICTOFF;
79}
80
81std::optional<TextElement> FindMatchingCommand(uint8_t b) {
82 for (const auto& text_element : TextCommands) {
83 if (text_element.ID == b) {
84 return text_element;
85 }
86 }
87 return std::nullopt;
88}
89
90std::optional<TextElement> FindMatchingSpecial(uint8_t value) {
91 auto it = std::ranges::find_if(SpecialChars,
92 [value](const TextElement& text_element) {
93 return text_element.ID == value;
94 });
95 if (it != SpecialChars.end()) {
96 return *it;
97 }
98 return std::nullopt;
99}
100
101ParsedElement FindMatchingElement(const std::string& str) {
102 std::smatch match;
103 std::vector<TextElement> commands_and_chars = TextCommands;
104 commands_and_chars.insert(commands_and_chars.end(), SpecialChars.begin(),
105 SpecialChars.end());
106 for (auto& text_element : commands_and_chars) {
107 match = text_element.MatchMe(str);
108 if (match.size() > 0) {
109 if (text_element.HasArgument) {
110 std::string arg = match[1].str().substr(1);
111 try {
112 return ParsedElement(text_element, std::stoi(arg, nullptr, 16));
113 } catch (const std::invalid_argument& e) {
114 util::logf("Error parsing argument for %s: %s",
115 text_element.GenericToken.c_str(), arg.c_str());
116 return ParsedElement(text_element, 0);
117 } catch (const std::out_of_range& e) {
118 util::logf("Argument out of range for %s: %s",
119 text_element.GenericToken.c_str(), arg.c_str());
120 return ParsedElement(text_element, 0);
121 }
122 } else {
123 return ParsedElement(text_element, 0);
124 }
125 }
126 }
127
128 const auto dictionary_element =
129 TextElement(0x80, DICTIONARYTOKEN, true, "Dictionary");
130
131 match = dictionary_element.MatchMe(str);
132 if (match.size() > 0) {
133 try {
134 // match[1] captures ":XX" — strip the leading colon
135 std::string dict_arg = match[1].str().substr(1);
136 return ParsedElement(dictionary_element,
137 DICTOFF + std::stoi(dict_arg, nullptr, 16));
138 } catch (const std::exception& e) {
139 util::logf("Error parsing dictionary token: %s", match[1].str().c_str());
140 return ParsedElement();
141 }
142 }
143 return ParsedElement();
144}
145
146std::string ParseTextDataByte(uint8_t value) {
147 if (CharEncoder.contains(value)) {
148 char c = CharEncoder.at(value);
149 std::string str = "";
150 str.push_back(c);
151 return str;
152 }
153
154 // Check for command.
155 if (auto text_element = FindMatchingCommand(value);
156 text_element != std::nullopt) {
157 return text_element->GenericToken;
158 }
159
160 // Check for special characters.
161 if (auto special_element = FindMatchingSpecial(value);
162 special_element != std::nullopt) {
163 return special_element->GenericToken;
164 }
165
166 // Check for dictionary.
167 int8_t dictionary = FindDictionaryEntry(value);
168 if (dictionary >= 0) {
169 return absl::StrFormat("[%s:%02X]", DICTIONARYTOKEN,
170 static_cast<unsigned char>(dictionary));
171 }
172
173 return "";
174}
175
176std::vector<uint8_t> ParseMessageToData(std::string str) {
177 std::vector<uint8_t> bytes;
178 std::string temp_string = std::move(str);
179 int pos = 0;
180 while (pos < temp_string.size()) {
181 // Get next text fragment.
182 if (temp_string[pos] == '[') {
183 int next = temp_string.find(']', pos);
184 if (next == -1) {
185 break;
186 }
187
188 ParsedElement parsedElement =
189 FindMatchingElement(temp_string.substr(pos, next - pos + 1));
190
191 const auto dictionary_element =
192 TextElement(0x80, DICTIONARYTOKEN, true, "Dictionary");
193
194 if (!parsedElement.Active) {
195 util::logf("Error parsing message: %s", temp_string);
196 break;
197 } else if (parsedElement.Parent == dictionary_element) {
198 bytes.push_back(parsedElement.Value);
199 } else {
200 bytes.push_back(parsedElement.Parent.ID);
201
202 if (parsedElement.Parent.HasArgument) {
203 bytes.push_back(parsedElement.Value);
204 }
205 }
206
207 pos = next + 1;
208 continue;
209 } else {
210 uint8_t bb = FindMatchingCharacter(temp_string[pos++]);
211
212 if (bb != 0xFF) {
213 bytes.push_back(bb);
214 }
215 }
216 }
217
218 return bytes;
219}
220
222 MessageParseResult result;
223 std::string temp_string(str);
224 size_t pos = 0;
225 bool warned_newline = false;
226
227 while (pos < temp_string.size()) {
228 char current = temp_string[pos];
229 if (current == '\r' || current == '\n') {
230 if (!warned_newline) {
231 result.warnings.push_back(
232 "Literal newlines are ignored; use [1], [2], [3], [V], or [K] "
233 "tokens for line breaks.");
234 warned_newline = true;
235 }
236 pos++;
237 continue;
238 }
239
240 if (current == '[') {
241 size_t close = temp_string.find(']', pos);
242 if (close == std::string::npos) {
243 result.errors.push_back(
244 absl::StrFormat("Unclosed token starting at position %zu", pos));
245 break;
246 }
247
248 std::string token = temp_string.substr(pos, close - pos + 1);
249 ParsedElement parsed_element = FindMatchingElement(token);
250 const auto dictionary_element =
251 TextElement(0x80, DICTIONARYTOKEN, true, "Dictionary");
252
253 if (!parsed_element.Active) {
254 result.errors.push_back(absl::StrFormat("Unknown token: %s", token));
255 pos = close + 1;
256 continue;
257 }
258
259 if (!parsed_element.Parent.HasArgument) {
260 if (token != parsed_element.Parent.GetParamToken()) {
261 result.errors.push_back(absl::StrFormat("Unknown token: %s", token));
262 pos = close + 1;
263 continue;
264 }
265 }
266
267 if (parsed_element.Parent == dictionary_element) {
268 result.bytes.push_back(parsed_element.Value);
269 } else {
270 result.bytes.push_back(parsed_element.Parent.ID);
271 if (parsed_element.Parent.HasArgument) {
272 result.bytes.push_back(parsed_element.Value);
273 }
274 }
275
276 pos = close + 1;
277 continue;
278 }
279
280 uint8_t bb = FindMatchingCharacter(current);
281 if (bb == 0xFF) {
282 result.errors.push_back(absl::StrFormat(
283 "Unsupported character '%c' at position %zu", current, pos));
284 pos++;
285 continue;
286 }
287
288 result.bytes.push_back(bb);
289 pos++;
290 }
291
292 return result;
293}
294
296 switch (bank) {
298 return "vanilla";
300 return "expanded";
301 }
302 return "vanilla";
303}
304
305absl::StatusOr<MessageBank> MessageBankFromString(std::string_view value) {
306 const std::string lowered = absl::AsciiStrToLower(std::string(value));
307 if (lowered == "vanilla") {
309 }
310 if (lowered == "expanded") {
312 }
313 return absl::InvalidArgumentError(
314 absl::StrFormat("Unknown message bank: %s", std::string(value)));
315}
316
317std::vector<DictionaryEntry> BuildDictionaryEntries(Rom* rom) {
318 std::vector<DictionaryEntry> AllDictionaries;
319 for (int i = 0; i < kNumDictionaryEntries; i++) {
320 std::vector<uint8_t> bytes;
321 std::stringstream stringBuilder;
322
323 int address = SnesToPc(
324 kTextData + (rom->data()[kPointersDictionaries + (i * 2) + 1] << 8) +
325 rom->data()[kPointersDictionaries + (i * 2)]);
326
327 int temppush_backress =
329 (rom->data()[kPointersDictionaries + ((i + 1) * 2) + 1] << 8) +
330 rom->data()[kPointersDictionaries + ((i + 1) * 2)]);
331
332 while (address < temppush_backress) {
333 uint8_t uint8_tDictionary = rom->data()[address++];
334 bytes.push_back(uint8_tDictionary);
335 stringBuilder << ParseTextDataByte(uint8_tDictionary);
336 }
337
338 AllDictionaries.push_back(DictionaryEntry{(uint8_t)i, stringBuilder.str()});
339 }
340
341 std::ranges::sort(AllDictionaries,
342 [](const DictionaryEntry& a, const DictionaryEntry& b) {
343 return a.Contents.size() > b.Contents.size();
344 });
345
346 return AllDictionaries;
347}
348
350 std::string str, const std::vector<DictionaryEntry>& dictionary) {
351 std::string temp = std::move(str);
352 for (const auto& entry : dictionary) {
353 if (entry.ContainedInString(temp)) {
354 temp = entry.ReplaceInstancesOfIn(temp);
355 }
356 }
357 return temp;
358}
359
360std::optional<size_t> FindTextMatch(std::string_view text,
361 std::string_view query, size_t start_pos,
362 bool case_sensitive,
363 bool match_whole_word) {
364 if (query.empty() || start_pos > text.size()) {
365 return std::nullopt;
366 }
367
368 std::string haystack_storage;
369 std::string query_storage;
370 std::string_view haystack = text;
371 std::string_view needle = query;
372 if (!case_sensitive) {
373 haystack_storage = LowercaseCopy(text);
374 query_storage = LowercaseCopy(query);
375 haystack = haystack_storage;
376 needle = query_storage;
377 }
378
379 size_t pos = haystack.find(needle, start_pos);
380 while (pos != std::string::npos) {
381 if (!match_whole_word || MatchesWholeWordAt(text, pos, query.size())) {
382 return pos;
383 }
384 pos = haystack.find(needle, pos + 1);
385 }
386
387 return std::nullopt;
388}
389
390int ReplaceTextMatches(std::string* text, std::string_view query,
391 std::string_view replacement, size_t start_pos,
392 bool replace_all, bool case_sensitive,
393 bool match_whole_word,
394 size_t* first_replaced_pos) {
395 if (!text || query.empty() || start_pos > text->size()) {
396 return 0;
397 }
398
399 int replacements = 0;
400 size_t cursor = start_pos;
401 while (true) {
402 const auto match_pos =
403 FindTextMatch(*text, query, cursor, case_sensitive, match_whole_word);
404 if (!match_pos.has_value()) {
405 break;
406 }
407
408 text->replace(*match_pos, query.size(), replacement);
409 if (replacements == 0 && first_replaced_pos != nullptr) {
410 *first_replaced_pos = *match_pos;
411 }
412 replacements++;
413
414 cursor = *match_pos + replacement.size();
415 if (!replace_all) {
416 break;
417 }
418
419 if (cursor > text->size()) {
420 break;
421 }
422 }
423
424 return replacements;
425}
426
428 uint8_t value, const std::vector<DictionaryEntry>& dictionary) {
429 for (const auto& entry : dictionary) {
430 if (entry.ID + DICTOFF == value) {
431 return entry;
432 }
433 }
434 return DictionaryEntry();
435}
436
437absl::StatusOr<MessageData> ParseSingleMessage(
438 const std::vector<uint8_t>& rom_data, int* current_pos) {
439 if (current_pos == nullptr) {
440 return absl::InvalidArgumentError("current_pos is null");
441 }
442 if (*current_pos < 0 ||
443 static_cast<size_t>(*current_pos) >= rom_data.size()) {
444 return absl::OutOfRangeError("current_pos is out of range");
445 }
446
447 MessageData message_data;
448 int pos = *current_pos;
449 uint8_t current_byte;
450 std::vector<uint8_t> temp_bytes_raw;
451 std::vector<uint8_t> temp_bytes_parsed;
452 std::string current_message_raw;
453 std::string current_message_parsed;
454
455 // Read the message data
456 while (pos < static_cast<int>(rom_data.size())) {
457 current_byte = rom_data[pos++];
458
459 if (current_byte == kMessageTerminator) {
460 message_data.ID = message_data.ID + 1;
461 message_data.Address = pos;
462 message_data.RawString = current_message_raw;
463 message_data.Data = temp_bytes_raw;
464 message_data.DataParsed = temp_bytes_parsed;
465 message_data.ContentsParsed = current_message_parsed;
466
467 temp_bytes_raw.clear();
468 temp_bytes_parsed.clear();
469 current_message_raw.clear();
470 current_message_parsed.clear();
471
472 *current_pos = pos;
473 return message_data;
474 } else if (current_byte == 0xFF) {
475 return absl::InvalidArgumentError("message terminator not found");
476 }
477
478 temp_bytes_raw.push_back(current_byte);
479
480 // Check for command.
481 auto text_element = FindMatchingCommand(current_byte);
482 if (text_element != std::nullopt) {
483 temp_bytes_parsed.push_back(current_byte);
484 if (text_element->HasArgument) {
485 if (pos >= static_cast<int>(rom_data.size())) {
486 return absl::OutOfRangeError("message command argument out of range");
487 }
488 uint8_t arg_byte = rom_data[pos++];
489 temp_bytes_raw.push_back(arg_byte);
490 temp_bytes_parsed.push_back(arg_byte);
491 current_message_raw.append(text_element->GetParamToken(arg_byte));
492 current_message_parsed.append(text_element->GetParamToken(arg_byte));
493 } else {
494 current_message_raw.append(text_element->GetParamToken());
495 current_message_parsed.append(text_element->GetParamToken());
496 }
497 continue;
498 }
499
500 // Check for special characters.
501 if (auto special_element = FindMatchingSpecial(current_byte);
502 special_element != std::nullopt) {
503 current_message_raw.append(special_element->GetParamToken());
504 current_message_parsed.append(special_element->GetParamToken());
505 temp_bytes_parsed.push_back(current_byte);
506 continue;
507 }
508
509 // Check for dictionary.
510 int8_t dictionary = FindDictionaryEntry(current_byte);
511 if (dictionary >= 0) {
512 std::string token = absl::StrFormat(
513 "[%s:%02X]", DICTIONARYTOKEN, static_cast<unsigned char>(dictionary));
514 current_message_raw.append(token);
515 current_message_parsed.append(token);
516 temp_bytes_parsed.push_back(current_byte);
517 continue;
518 }
519
520 // Everything else.
521 if (CharEncoder.contains(current_byte)) {
522 std::string str = "";
523 str.push_back(CharEncoder.at(current_byte));
524 current_message_raw.append(str);
525 current_message_parsed.append(str);
526 temp_bytes_parsed.push_back(current_byte);
527 }
528 }
529
530 *current_pos = pos;
531 return absl::InvalidArgumentError("message terminator not found");
532}
533
534std::vector<std::string> ParseMessageData(
535 std::vector<MessageData>& message_data,
536 const std::vector<DictionaryEntry>& dictionary_entries) {
537 std::vector<std::string> parsed_messages;
538
539 for (auto& message : message_data) {
540 std::string parsed_message = "";
541 // Use index-based loop to properly skip argument bytes
542 for (size_t pos = 0; pos < message.Data.size(); ++pos) {
543 uint8_t byte = message.Data[pos];
544
545 // Check for text commands first (they may have arguments to skip)
546 auto text_element = FindMatchingCommand(byte);
547 if (text_element != std::nullopt) {
548 // Add newline for certain commands
549 if (text_element->ID == kScrollVertical || text_element->ID == kLine2 ||
550 text_element->ID == kLine3) {
551 parsed_message.append("\n");
552 }
553 // If command has an argument, get it from next byte and skip it
554 if (text_element->HasArgument && pos + 1 < message.Data.size()) {
555 uint8_t arg_byte = message.Data[pos + 1];
556 parsed_message.append(text_element->GetParamToken(arg_byte));
557 pos++; // Skip the argument byte
558 } else {
559 parsed_message.append(text_element->GetParamToken());
560 }
561 continue; // Move to next byte
562 }
563
564 // Check for special characters
565 auto special_element = FindMatchingSpecial(byte);
566 if (special_element != std::nullopt) {
567 parsed_message.append(special_element->GetParamToken());
568 continue;
569 }
570
571 // Check for dictionary entries
572 if (byte >= DICTOFF && byte < (DICTOFF + 97)) {
573 DictionaryEntry dic_entry;
574 for (const auto& entry : dictionary_entries) {
575 if (entry.ID == byte - DICTOFF) {
576 dic_entry = entry;
577 break;
578 }
579 }
580 parsed_message.append(dic_entry.Contents);
581 continue;
582 }
583
584 // Finally check for regular characters
585 if (CharEncoder.contains(byte)) {
586 parsed_message.push_back(CharEncoder.at(byte));
587 }
588 }
589 parsed_messages.push_back(parsed_message);
590 }
591
592 return parsed_messages;
593}
594
595std::vector<MessageData> ReadAllTextData(uint8_t* rom, int pos, int max_pos) {
596 std::vector<MessageData> list_of_texts;
597 int message_id = 0;
598
599 if (!rom) {
600 return list_of_texts;
601 }
602 if (max_pos > 0 && (pos < 0 || pos >= max_pos)) {
603 return list_of_texts;
604 }
605
606 std::vector<uint8_t> raw_message;
607 std::vector<uint8_t> parsed_message;
608 std::string current_raw_message;
609 std::string current_parsed_message;
610
611 bool did_bank_switch = false;
612 uint8_t current_byte = 0;
613 while (current_byte != 0xFF) {
614 if (max_pos > 0 && (pos < 0 || pos >= max_pos))
615 break;
616 current_byte = rom[pos++];
617 if (current_byte == kMessageTerminator) {
618 list_of_texts.push_back(
619 MessageData(message_id++, pos, current_raw_message, raw_message,
620 current_parsed_message, parsed_message));
621 raw_message.clear();
622 parsed_message.clear();
623 current_raw_message.clear();
624 current_parsed_message.clear();
625 continue;
626 } else if (current_byte == 0xFF) {
627 break;
628 }
629
630 raw_message.push_back(current_byte);
631
632 auto text_element = FindMatchingCommand(current_byte);
633 if (text_element != std::nullopt) {
634 parsed_message.push_back(current_byte);
635 if (text_element->HasArgument) {
636 if (max_pos > 0 && (pos < 0 || pos >= max_pos))
637 break;
638 current_byte = rom[pos++];
639 raw_message.push_back(current_byte);
640 parsed_message.push_back(current_byte);
641 }
642
643 current_raw_message.append(text_element->GetParamToken(current_byte));
644 current_parsed_message.append(text_element->GetParamToken(current_byte));
645
646 if (text_element->Token == kBankToken && !did_bank_switch) {
647 did_bank_switch = true;
648 pos = kTextData2;
649 }
650
651 continue;
652 }
653
654 // Check for special characters.
655 auto special_element = FindMatchingSpecial(current_byte);
656 if (special_element != std::nullopt) {
657 current_raw_message.append(special_element->GetParamToken());
658 current_parsed_message.append(special_element->GetParamToken());
659 parsed_message.push_back(current_byte);
660 continue;
661 }
662
663 // Check for dictionary.
664 int8_t dictionary = FindDictionaryEntry(current_byte);
665 if (dictionary >= 0) {
666 current_raw_message.append(absl::StrFormat(
667 "[%s:%s]", DICTIONARYTOKEN,
668 util::HexByte(static_cast<unsigned char>(dictionary))));
669
670 // Safety: bounds-check dictionary pointer reads and dictionary expansion.
671 // This parser is used by tooling (RomDoctor) that may run on dummy or
672 // partially-initialized ROM buffers.
673 const int ptr_a = kPointersDictionaries + (dictionary * 2);
674 const int ptr_b = kPointersDictionaries + ((dictionary + 1) * 2);
675 if (max_pos > 0) {
676 if (ptr_a < 0 || ptr_a + 1 >= max_pos || ptr_b < 0 ||
677 ptr_b + 1 >= max_pos) {
678 continue;
679 }
680 }
681
682 uint32_t address =
683 Get24LocalFromPC(rom, kPointersDictionaries + (dictionary * 2));
684 uint32_t address_end =
685 Get24LocalFromPC(rom, kPointersDictionaries + ((dictionary + 1) * 2));
686
687 if (max_pos > 0) {
688 const uint32_t max_u = static_cast<uint32_t>(max_pos);
689 if (address >= max_u || address_end > max_u || address_end < address) {
690 continue;
691 }
692 }
693
694 for (uint32_t i = address; i < address_end; i++) {
695 if (max_pos > 0 && i >= static_cast<uint32_t>(max_pos))
696 break;
697 parsed_message.push_back(rom[i]);
698 current_parsed_message.append(ParseTextDataByte(rom[i]));
699 }
700
701 continue;
702 }
703
704 // Everything else.
705 if (CharEncoder.contains(current_byte)) {
706 std::string str = "";
707 str.push_back(CharEncoder.at(current_byte));
708 current_raw_message.append(str);
709 current_parsed_message.append(str);
710 parsed_message.push_back(current_byte);
711 }
712 }
713
714 return list_of_texts;
715}
716
717absl::Status LoadExpandedMessages(std::string& expanded_message_path,
718 std::vector<std::string>& parsed_messages,
719 std::vector<MessageData>& expanded_messages,
720 std::vector<DictionaryEntry>& dictionary) {
721 static Rom expanded_message_rom;
722 if (!expanded_message_rom.LoadFromFile(expanded_message_path).ok()) {
723 return absl::InternalError("Failed to load expanded message ROM");
724 }
725 expanded_messages = ReadAllTextData(expanded_message_rom.mutable_data(), 0);
726 auto parsed_expanded_messages =
727 ParseMessageData(expanded_messages, dictionary);
728 // Insert into parsed_messages
729 for (const auto& expanded_message : expanded_messages) {
730 parsed_messages.push_back(parsed_expanded_messages[expanded_message.ID]);
731 }
732 return absl::OkStatus();
733}
734
736 const std::vector<MessageData>& messages) {
737 nlohmann::json j = nlohmann::json::array();
738 for (const auto& msg : messages) {
739 j.push_back({{"id", msg.ID},
740 {"address", msg.Address},
741 {"raw_string", msg.RawString},
742 {"parsed_string", msg.ContentsParsed}});
743 }
744 return j;
745}
746
747absl::Status ExportMessagesToJson(const std::string& path,
748 const std::vector<MessageData>& messages) {
749 try {
750 nlohmann::json j = SerializeMessagesToJson(messages);
751 std::ofstream file(path);
752 if (!file.is_open()) {
753 return absl::InternalError(
754 absl::StrFormat("Failed to open file for writing: %s", path));
755 }
756 file << j.dump(2); // Pretty print with 2-space indent
757 return absl::OkStatus();
758 } catch (const std::exception& e) {
759 return absl::InternalError(
760 absl::StrFormat("JSON export failed: %s", e.what()));
761 }
762}
763
765 const std::vector<MessageData>& vanilla,
766 const std::vector<MessageData>& expanded) {
767 nlohmann::json j;
768 j["format"] = "yaze-message-bundle";
769 j["version"] = kMessageBundleVersion;
770 j["counts"] = {{"vanilla", vanilla.size()}, {"expanded", expanded.size()}};
771 j["messages"] = nlohmann::json::array();
772
773 auto append_messages = [&j](const std::vector<MessageData>& messages,
774 MessageBank bank) {
775 for (const auto& msg : messages) {
776 nlohmann::json entry;
777 entry["id"] = msg.ID;
778 entry["bank"] = MessageBankToString(bank);
779 entry["address"] = msg.Address;
780 entry["raw"] = msg.RawString;
781 entry["parsed"] = msg.ContentsParsed;
782 entry["text"] =
783 !msg.RawString.empty() ? msg.RawString : msg.ContentsParsed;
784 entry["length"] = msg.Data.size();
785 const std::string validation_text =
786 !msg.RawString.empty() ? msg.RawString : msg.ContentsParsed;
787 auto warnings = ValidateMessageLineWidths(validation_text);
788 if (!warnings.empty()) {
789 entry["line_width_warnings"] = warnings;
790 }
791 j["messages"].push_back(entry);
792 }
793 };
794
795 append_messages(vanilla, MessageBank::kVanilla);
796 append_messages(expanded, MessageBank::kExpanded);
797
798 return j;
799}
800
802 const std::string& path, const std::vector<MessageData>& vanilla,
803 const std::vector<MessageData>& expanded) {
804 try {
805 nlohmann::json j = SerializeMessageBundle(vanilla, expanded);
806 std::ofstream file(path);
807 if (!file.is_open()) {
808 return absl::InternalError(
809 absl::StrFormat("Failed to open file for writing: %s", path));
810 }
811 file << j.dump(2);
812 return absl::OkStatus();
813 } catch (const std::exception& e) {
814 return absl::InternalError(
815 absl::StrFormat("Message bundle export failed: %s", e.what()));
816 }
817}
818
819namespace {
820absl::StatusOr<MessageBundleEntry> ParseMessageBundleEntry(
821 const nlohmann::json& entry, MessageBank default_bank) {
822 if (!entry.is_object()) {
823 return absl::InvalidArgumentError("Message entry must be an object");
824 }
825
826 MessageBundleEntry result;
827 result.id = entry.value("id", -1);
828 if (result.id < 0) {
829 return absl::InvalidArgumentError("Message entry missing valid id");
830 }
831
832 if (entry.contains("bank")) {
833 if (!entry["bank"].is_string()) {
834 return absl::InvalidArgumentError("Message entry bank must be string");
835 }
836 auto bank_or = MessageBankFromString(entry["bank"].get<std::string>());
837 if (!bank_or.ok()) {
838 return bank_or.status();
839 }
840 result.bank = bank_or.value();
841 } else {
842 result.bank = default_bank;
843 }
844
845 if (entry.contains("raw") && entry["raw"].is_string()) {
846 result.raw = entry["raw"].get<std::string>();
847 } else if (entry.contains("raw_string") && entry["raw_string"].is_string()) {
848 result.raw = entry["raw_string"].get<std::string>();
849 }
850
851 if (entry.contains("parsed") && entry["parsed"].is_string()) {
852 result.parsed = entry["parsed"].get<std::string>();
853 } else if (entry.contains("parsed_string") &&
854 entry["parsed_string"].is_string()) {
855 result.parsed = entry["parsed_string"].get<std::string>();
856 }
857
858 if (entry.contains("text") && entry["text"].is_string()) {
859 result.text = entry["text"].get<std::string>();
860 }
861
862 if (result.text.empty()) {
863 if (!result.raw.empty()) {
864 result.text = result.raw;
865 } else if (!result.parsed.empty()) {
866 result.text = result.parsed;
867 }
868 }
869
870 if (result.text.empty()) {
871 return absl::InvalidArgumentError(
872 absl::StrFormat("Message entry %d missing text content", result.id));
873 }
874
875 return result;
876}
877} // namespace
878
879absl::StatusOr<std::vector<MessageBundleEntry>> ParseMessageBundleJson(
880 const nlohmann::json& json) {
881 std::vector<MessageBundleEntry> entries;
882
883 if (json.is_array()) {
884 for (const auto& entry : json) {
885 auto parsed_or = ParseMessageBundleEntry(entry, MessageBank::kVanilla);
886 if (!parsed_or.ok()) {
887 return parsed_or.status();
888 }
889 entries.push_back(parsed_or.value());
890 }
891 return entries;
892 }
893
894 if (!json.is_object()) {
895 return absl::InvalidArgumentError("Message bundle JSON must be object");
896 }
897
898 if (json.contains("version") && json["version"].is_number_integer()) {
899 int version = json["version"].get<int>();
900 if (version != kMessageBundleVersion) {
901 return absl::InvalidArgumentError(
902 absl::StrFormat("Unsupported message bundle version: %d", version));
903 }
904 }
905
906 if (!json.contains("messages") || !json["messages"].is_array()) {
907 return absl::InvalidArgumentError("Message bundle missing messages array");
908 }
909
910 for (const auto& entry : json["messages"]) {
911 auto parsed_or = ParseMessageBundleEntry(entry, MessageBank::kVanilla);
912 if (!parsed_or.ok()) {
913 return parsed_or.status();
914 }
915 entries.push_back(parsed_or.value());
916 }
917
918 return entries;
919}
920
921absl::StatusOr<std::vector<MessageBundleEntry>> LoadMessageBundleFromJson(
922 const std::string& path) {
923 std::ifstream file(path);
924 if (!file.is_open()) {
925 return absl::NotFoundError(
926 absl::StrFormat("Cannot open message bundle: %s", path));
927 }
928
929 nlohmann::json json;
930 try {
931 file >> json;
932 } catch (const std::exception& e) {
933 return absl::InvalidArgumentError(
934 absl::StrFormat("Failed to parse JSON: %s", e.what()));
935 }
936
937 return ParseMessageBundleJson(json);
938}
939
940// ===========================================================================
941// Line Width Validation
942// ===========================================================================
943
944std::vector<std::string> ValidateMessageLineWidths(const std::string& message) {
945 std::vector<std::string> warnings;
946
947 // Split message into lines on line-break tokens: [1], [2], [3], [V], [K]
948 // We walk through the string, counting visible characters per line.
949 int line_num = 1;
950 int visible_chars = 0;
951 bool all_spaces_this_line = true;
952 size_t pos = 0;
953
954 while (pos < message.size()) {
955 if (message[pos] == '[') {
956 // Find the closing bracket
957 size_t close = message.find(']', pos);
958 if (close == std::string::npos)
959 break;
960
961 std::string token = message.substr(pos, close - pos + 1);
962 pos = close + 1;
963
964 // Check if this token is a line-breaking command
965 // Line breaks: [1], [2], [3], [V], [K]
966 if (token == "[1]" || token == "[2]" || token == "[3]" ||
967 token == "[V]" || token == "[K]") {
968 // Check current line width before breaking.
969 // Exempt whitespace-only lines (used as screen clears in ALTTP).
970 if (visible_chars > kMaxLineWidth && !all_spaces_this_line) {
971 warnings.push_back(
972 absl::StrFormat("Line %d: %d visible characters (max %d)",
973 line_num, visible_chars, kMaxLineWidth));
974 }
975 line_num++;
976 visible_chars = 0;
977 all_spaces_this_line = true;
978 }
979 // Other command tokens ([W:02], [S:03], [SFX:2D], [L], [...], etc.)
980 // are not counted as visible characters - they're control codes or
981 // expand to game-rendered content that we can't measure in chars.
982 // Exception: [L] expands to player name but width varies (1-6 chars).
983 // For simplicity, we don't count command tokens.
984 continue;
985 }
986
987 // Regular visible character
988 if (message[pos] != ' ')
989 all_spaces_this_line = false;
990 visible_chars++;
991 pos++;
992 }
993
994 // Check the last line (exempt whitespace-only lines)
995 if (visible_chars > kMaxLineWidth && !all_spaces_this_line) {
996 warnings.push_back(
997 absl::StrFormat("Line %d: %d visible characters (max %d)", line_num,
998 visible_chars, kMaxLineWidth));
999 }
1000
1001 return warnings;
1002}
1003
1004// ===========================================================================
1005// Org Format (.org) Import/Export
1006// ===========================================================================
1007
1008std::optional<std::pair<int, std::string>> ParseOrgHeader(
1009 const std::string& line) {
1010 // Expected format: "** XX - Label Text"
1011 // where XX is a hex message ID
1012 if (line.size() < 6 || line[0] != '*' || line[1] != '*' || line[2] != ' ') {
1013 return std::nullopt;
1014 }
1015
1016 // Find the " - " separator
1017 size_t sep = line.find(" - ", 3);
1018 if (sep == std::string::npos) {
1019 return std::nullopt;
1020 }
1021
1022 // Parse hex ID between "** " and " - "
1023 std::string hex_id = line.substr(3, sep - 3);
1024 int message_id;
1025 try {
1026 message_id = std::stoi(hex_id, nullptr, 16);
1027 } catch (const std::exception&) {
1028 return std::nullopt;
1029 }
1030
1031 // Extract label after " - "
1032 std::string label = line.substr(sep + 3);
1033
1034 return std::make_pair(message_id, label);
1035}
1036
1037std::vector<std::pair<int, std::string>> ParseOrgContent(
1038 const std::string& content) {
1039 std::vector<std::pair<int, std::string>> messages;
1040 std::istringstream stream(content);
1041 std::string line;
1042
1043 int current_id = -1;
1044 std::string current_body;
1045
1046 while (std::getline(stream, line)) {
1047 // Check if this is a header line
1048 auto header = ParseOrgHeader(line);
1049 if (header.has_value()) {
1050 // Save previous message if any
1051 if (current_id >= 0) {
1052 // Trim trailing newline from body
1053 while (!current_body.empty() && current_body.back() == '\n') {
1054 current_body.pop_back();
1055 }
1056 messages.push_back({current_id, current_body});
1057 }
1058
1059 current_id = header->first;
1060 current_body.clear();
1061 continue;
1062 }
1063
1064 // Skip top-level org headers (single *)
1065 if (!line.empty() && line[0] == '*' &&
1066 (line.size() < 2 || line[1] != '*')) {
1067 continue;
1068 }
1069
1070 // Accumulate body text
1071 if (current_id >= 0) {
1072 if (!current_body.empty()) {
1073 current_body += "\n";
1074 }
1075 current_body += line;
1076 }
1077 }
1078
1079 // Save last message
1080 if (current_id >= 0) {
1081 while (!current_body.empty() && current_body.back() == '\n') {
1082 current_body.pop_back();
1083 }
1084 messages.push_back({current_id, current_body});
1085 }
1086
1087 return messages;
1088}
1089
1091 const std::vector<std::pair<int, std::string>>& messages,
1092 const std::vector<std::string>& labels) {
1093 std::string output;
1094 output += "* Oracle of Secrets English Dialogue\n";
1095
1096 for (size_t i = 0; i < messages.size(); ++i) {
1097 const auto& [msg_id, body] = messages[i];
1098 std::string label = (i < labels.size())
1099 ? labels[i]
1100 : absl::StrFormat("Message %02X", msg_id);
1101
1102 output += absl::StrFormat("** %02X - %s\n", msg_id, label);
1103 output += body;
1104 output += "\n\n";
1105 }
1106
1107 return output;
1108}
1109
1110// ===========================================================================
1111// Expanded Message Bank
1112// ===========================================================================
1113
1114std::vector<MessageData> ReadExpandedTextData(uint8_t* rom, int pos) {
1115 // Reuse ReadAllTextData — it already handles 0x7F terminators and 0xFF end
1116 return ReadAllTextData(rom, pos);
1117}
1118
1119absl::Status WriteExpandedTextData(Rom* rom, int start, int end,
1120 const std::vector<std::string>& messages) {
1121 if (rom == nullptr || !rom->is_loaded()) {
1122 return absl::InvalidArgumentError("ROM not loaded");
1123 }
1124 if (start < 0 || end < start) {
1125 return absl::InvalidArgumentError("Invalid expanded message region");
1126 }
1127
1128 const int capacity = end - start + 1;
1129 if (capacity <= 0) {
1130 return absl::InvalidArgumentError(
1131 "Expanded message region has no capacity");
1132 }
1133
1134 const auto& data = rom->vector();
1135 if (end >= static_cast<int>(data.size())) {
1136 return absl::OutOfRangeError("Expanded message region out of ROM range");
1137 }
1138
1139 // Serialize into a contiguous buffer, then do a single ROM write for safety
1140 // and determinism (and to honor write fences).
1141 std::vector<uint8_t> blob;
1142 blob.reserve(static_cast<size_t>(capacity));
1143
1144 int used = 0;
1145 for (size_t i = 0; i < messages.size(); ++i) {
1146 auto bytes = ParseMessageToData(messages[i]);
1147 const int needed = static_cast<int>(bytes.size()) + 1; // +0x7F
1148
1149 // Always reserve space for the final 0xFF.
1150 if (used + needed + 1 > capacity) {
1151 return absl::ResourceExhaustedError(absl::StrFormat(
1152 "Expanded message data exceeds bank boundary "
1153 "(at message %d, used=%d, needed=%d, capacity=%d, end=0x%06X)",
1154 static_cast<int>(i), used, needed, capacity, end));
1155 }
1156
1157 blob.insert(blob.end(), bytes.begin(), bytes.end());
1158 blob.push_back(kMessageTerminator);
1159 used += needed;
1160 }
1161
1162 if (used + 1 > capacity) {
1163 return absl::ResourceExhaustedError(
1164 "No space for end-of-region marker (0xFF)");
1165 }
1166 blob.push_back(0xFF);
1167
1168 // ROM safety: this writer must only touch the expanded message region.
1169 // NOTE: `end` is inclusive; convert to half-open for the fence.
1171 const uint32_t fence_start = static_cast<uint32_t>(start);
1172 const uint32_t fence_end =
1173 static_cast<uint32_t>(static_cast<uint64_t>(end) + 1ULL);
1174 RETURN_IF_ERROR(fence.Allow(fence_start, fence_end, "ExpandedMessageBank"));
1175 yaze::rom::ScopedWriteFence scope(rom, &fence);
1176
1177 return rom->WriteVector(start, std::move(blob));
1178}
1179
1180absl::Status WriteExpandedTextData(uint8_t* rom, int start, int end,
1181 const std::vector<std::string>& messages) {
1182 int pos = start;
1183 int capacity = end - start + 1;
1184
1185 for (size_t i = 0; i < messages.size(); ++i) {
1186 auto bytes = ParseMessageToData(messages[i]);
1187
1188 // Check space: bytes + terminator (0x7F) + final end marker (0xFF)
1189 int needed = static_cast<int>(bytes.size()) + 1; // +1 for 0x7F
1190 if (i == messages.size() - 1) {
1191 needed += 1; // +1 for final 0xFF
1192 }
1193
1194 if (pos + needed - start > capacity) {
1195 return absl::ResourceExhaustedError(
1196 absl::StrFormat("Expanded message data exceeds bank boundary "
1197 "(at message %d, pos 0x%06X, end 0x%06X)",
1198 static_cast<int>(i), pos, end));
1199 }
1200
1201 // Write encoded bytes
1202 for (uint8_t byte : bytes) {
1203 rom[pos++] = byte;
1204 }
1205 // Write message terminator
1206 rom[pos++] = kMessageTerminator;
1207 }
1208
1209 // Write end-of-region marker
1210 if (pos - start >= capacity) {
1211 return absl::ResourceExhaustedError(
1212 "No space for end-of-region marker (0xFF)");
1213 }
1214 rom[pos++] = 0xFF;
1215
1216 return absl::OkStatus();
1217}
1218
1219absl::Status WriteAllTextData(Rom* rom,
1220 const std::vector<MessageData>& messages) {
1221 if (rom == nullptr || !rom->is_loaded()) {
1222 return absl::InvalidArgumentError("ROM not loaded");
1223 }
1224
1225 int pos = kTextData;
1226 bool in_second_bank = false;
1227
1228 for (const auto& message : messages) {
1229 for (uint8_t value : message.Data) {
1230 RETURN_IF_ERROR(rom->WriteByte(pos, value));
1231
1232 if (value == kBankSwitchCommand) {
1233 if (!in_second_bank && pos > kTextDataEnd) {
1234 return absl::ResourceExhaustedError(absl::StrFormat(
1235 "Text data exceeds first bank (pos 0x%06X)", pos));
1236 }
1237 pos = kTextData2 - 1;
1238 in_second_bank = true;
1239 }
1240
1241 pos++;
1242 }
1243
1245 }
1246
1247 if (!in_second_bank && pos > kTextDataEnd) {
1248 return absl::ResourceExhaustedError(
1249 absl::StrFormat("Text data exceeds first bank (pos 0x%06X)", pos));
1250 }
1251
1252 if (in_second_bank && pos > kTextData2End) {
1253 return absl::ResourceExhaustedError(
1254 absl::StrFormat("Text data exceeds second bank (pos 0x%06X)", pos));
1255 }
1256
1257 RETURN_IF_ERROR(rom->WriteByte(pos, 0xFF));
1258 return absl::OkStatus();
1259}
1260
1261} // namespace editor
1262} // namespace yaze
The Rom class is used to load, save, and modify Rom data. This is a generic SNES ROM container and do...
Definition rom.h:28
absl::Status LoadFromFile(const std::string &filename, const LoadOptions &options=LoadOptions::Defaults())
Definition rom.cc:155
absl::Status WriteByte(int addr, uint8_t value)
Definition rom.cc:476
auto mutable_data()
Definition rom.h:140
const auto & vector() const
Definition rom.h:143
absl::Status WriteVector(int addr, std::vector< uint8_t > data)
Definition rom.cc:548
auto data() const
Definition rom.h:139
bool is_loaded() const
Definition rom.h:132
static RomSettings & Get()
uint32_t GetAddressOr(const std::string &key, uint32_t default_value) const
absl::Status Allow(uint32_t start, uint32_t end, std::string_view label)
Definition write_fence.h:32
constexpr char kExpandedMessageEnd[]
constexpr char kExpandedMessageStart[]
bool MatchesWholeWordAt(std::string_view text, size_t pos, size_t len)
absl::StatusOr< MessageBundleEntry > ParseMessageBundleEntry(const nlohmann::json &entry, MessageBank default_bank)
uint8_t FindMatchingCharacter(char value)
const std::string kBankToken
nlohmann::json SerializeMessagesToJson(const std::vector< MessageData > &messages)
absl::StatusOr< MessageBank > MessageBankFromString(std::string_view value)
DictionaryEntry FindRealDictionaryEntry(uint8_t value, const std::vector< DictionaryEntry > &dictionary)
constexpr int kMaxLineWidth
int GetExpandedTextDataStart()
constexpr int kMessageBundleVersion
const std::string DICTIONARYTOKEN
constexpr uint8_t kScrollVertical
std::string ParseTextDataByte(uint8_t value)
absl::Status WriteAllTextData(Rom *rom, const std::vector< MessageData > &messages)
absl::Status LoadExpandedMessages(std::string &expanded_message_path, std::vector< std::string > &parsed_messages, std::vector< MessageData > &expanded_messages, std::vector< DictionaryEntry > &dictionary)
constexpr int kTextData
std::optional< std::pair< int, std::string > > ParseOrgHeader(const std::string &line)
std::string MessageBankToString(MessageBank bank)
constexpr int kExpandedTextDataEndDefault
constexpr int kTextData2
std::string ReplaceAllDictionaryWords(std::string str, const std::vector< DictionaryEntry > &dictionary)
absl::Status WriteExpandedTextData(Rom *rom, int start, int end, const std::vector< std::string > &messages)
nlohmann::json SerializeMessageBundle(const std::vector< MessageData > &vanilla, const std::vector< MessageData > &expanded)
constexpr uint8_t kLine2
constexpr int kPointersDictionaries
absl::StatusOr< std::vector< MessageBundleEntry > > LoadMessageBundleFromJson(const std::string &path)
constexpr int kNumDictionaryEntries
absl::StatusOr< MessageData > ParseSingleMessage(const std::vector< uint8_t > &rom_data, int *current_pos)
absl::StatusOr< std::vector< MessageBundleEntry > > ParseMessageBundleJson(const nlohmann::json &json)
std::vector< std::string > ParseMessageData(std::vector< MessageData > &message_data, const std::vector< DictionaryEntry > &dictionary_entries)
std::optional< TextElement > FindMatchingSpecial(uint8_t value)
constexpr uint8_t kMessageTerminator
std::vector< MessageData > ReadAllTextData(uint8_t *rom, int pos, int max_pos)
constexpr int kTextData2End
std::vector< DictionaryEntry > BuildDictionaryEntries(Rom *rom)
constexpr uint8_t kBankSwitchCommand
int ReplaceTextMatches(std::string *text, std::string_view query, std::string_view replacement, size_t start_pos, bool replace_all, bool case_sensitive, bool match_whole_word, size_t *first_replaced_pos)
std::optional< size_t > FindTextMatch(std::string_view text, std::string_view query, size_t start_pos, bool case_sensitive, bool match_whole_word)
std::vector< uint8_t > ParseMessageToData(std::string str)
absl::Status ExportMessagesToJson(const std::string &path, const std::vector< MessageData > &messages)
absl::Status ExportMessageBundleToJson(const std::string &path, const std::vector< MessageData > &vanilla, const std::vector< MessageData > &expanded)
constexpr uint8_t DICTOFF
std::string ExportToOrgFormat(const std::vector< std::pair< int, std::string > > &messages, const std::vector< std::string > &labels)
std::vector< MessageData > ReadExpandedTextData(uint8_t *rom, int pos)
std::optional< TextElement > FindMatchingCommand(uint8_t b)
MessageParseResult ParseMessageToDataWithDiagnostics(std::string_view str)
int GetExpandedTextDataEnd()
ParsedElement FindMatchingElement(const std::string &str)
std::vector< std::string > ValidateMessageLineWidths(const std::string &message)
std::vector< std::pair< int, std::string > > ParseOrgContent(const std::string &content)
constexpr int kExpandedTextDataDefault
constexpr uint8_t kLine3
int8_t FindDictionaryEntry(uint8_t value)
constexpr int kTextDataEnd
std::string HexByte(uint8_t byte, HexStringParams params)
Definition hex.cc:30
void logf(const absl::FormatSpec< Args... > &format, Args &&... args)
Definition log.h:115
uint32_t Get24LocalFromPC(uint8_t *data, int addr, bool pc=true)
Definition snes.h:30
uint32_t SnesToPc(uint32_t addr) noexcept
Definition snes.h:8
#define RETURN_IF_ERROR(expr)
Definition snes.cc:22
std::vector< uint8_t > Data
std::vector< uint8_t > DataParsed
std::vector< uint8_t > bytes
std::vector< std::string > errors
std::vector< std::string > warnings
std::string GetParamToken(uint8_t value=0) const