2024-04-18 22:09:26 -04:00
|
|
|
// SPDX-FileCopyrightText: 2024 Joshua Goins <josh@redstrate.com>
|
|
|
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
|
|
|
|
use std::collections::HashMap;
|
|
|
|
use std::io::{Cursor, Seek, SeekFrom};
|
|
|
|
|
|
|
|
use crate::ByteSpan;
|
2024-04-20 13:18:03 -04:00
|
|
|
use binrw::binrw;
|
|
|
|
use binrw::{BinRead, BinReaderExt};
|
2024-04-18 22:09:26 -04:00
|
|
|
|
|
|
|
// Based off of https://github.com/Lotlab/ffxiv-vulgar-words-reader/
|
|
|
|
// Credit goes to Jim Kirisame for documenting this format
|
|
|
|
// TODO: double check I'm reading everything correctly
|
|
|
|
|
|
|
|
#[binrw]
|
|
|
|
#[derive(Debug)]
|
|
|
|
#[brw(little)]
|
|
|
|
pub struct EntryItem {
|
|
|
|
flag: u32,
|
|
|
|
sibling: u32,
|
|
|
|
child: u32,
|
2024-04-20 13:18:03 -04:00
|
|
|
offset: u32,
|
2024-04-18 22:09:26 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
#[binrw]
|
|
|
|
#[derive(Debug)]
|
|
|
|
#[brw(little)]
|
|
|
|
struct DictionaryHeader {
|
|
|
|
#[br(seek_before = SeekFrom::Start(0x8124))]
|
|
|
|
#[br(count = 256)]
|
|
|
|
chara_replace1: Vec<u16>,
|
|
|
|
|
|
|
|
#[br(count = 256)]
|
|
|
|
chara_replace2: Vec<u16>,
|
|
|
|
|
|
|
|
#[br(count = 256)]
|
|
|
|
chara_replace3: Vec<u16>,
|
|
|
|
|
|
|
|
#[br(count = 5)]
|
|
|
|
block_offsets: Vec<u32>,
|
|
|
|
|
|
|
|
#[br(count = 5)]
|
|
|
|
block_lengths: Vec<u32>,
|
|
|
|
|
|
|
|
#[br(pad_before = 4)]
|
|
|
|
#[br(count = 256)]
|
|
|
|
chara_block: Vec<u32>,
|
|
|
|
|
|
|
|
#[br(ignore)]
|
|
|
|
begin_node: Vec<u16>,
|
|
|
|
|
|
|
|
#[br(ignore)]
|
|
|
|
inner_node: Vec<u16>,
|
|
|
|
|
|
|
|
#[br(ignore)]
|
|
|
|
chara: Vec<u16>,
|
|
|
|
|
|
|
|
#[br(ignore)]
|
|
|
|
word: Vec<u16>,
|
|
|
|
|
|
|
|
#[br(ignore)]
|
|
|
|
entries: Vec<EntryItem>,
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct Dictionary {
|
|
|
|
header: DictionaryHeader,
|
2024-04-20 13:18:03 -04:00
|
|
|
pub words: Vec<String>,
|
2024-04-18 22:09:26 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Dictionary {
|
|
|
|
/// Parses an existing dictionary file.
|
|
|
|
pub fn from_existing(buffer: ByteSpan) -> Option<Dictionary> {
|
|
|
|
let mut cursor = Cursor::new(buffer);
|
|
|
|
let mut dict = DictionaryHeader::read(&mut cursor).unwrap();
|
|
|
|
|
|
|
|
let map_start = 0x8750u32;
|
|
|
|
let map_size = 0x200u32;
|
|
|
|
|
|
|
|
// fix up offsets
|
|
|
|
for offset in &mut dict.block_offsets {
|
|
|
|
*offset = *offset + map_start + map_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
for i in 0..dict.block_lengths[0] / 2 {
|
|
|
|
let offset = dict.block_offsets[0] + i * 2;
|
|
|
|
cursor.seek(SeekFrom::Start(offset as u64)).ok()?;
|
|
|
|
dict.begin_node.push(cursor.read_le::<u16>().ok()?);
|
|
|
|
}
|
|
|
|
|
|
|
|
for i in 0..dict.block_lengths[1] / 2 {
|
|
|
|
let offset = dict.block_offsets[1] + i * 2;
|
|
|
|
cursor.seek(SeekFrom::Start(offset as u64)).ok()?;
|
|
|
|
dict.inner_node.push(cursor.read_le::<u16>().ok()?);
|
|
|
|
}
|
|
|
|
|
|
|
|
for i in 0..dict.block_lengths[2] / 2 {
|
|
|
|
let offset = dict.block_offsets[2] + i * 2;
|
|
|
|
cursor.seek(SeekFrom::Start(offset as u64)).ok()?;
|
|
|
|
dict.chara.push(cursor.read_le::<u16>().ok()?);
|
|
|
|
}
|
|
|
|
|
|
|
|
for i in 0..dict.block_lengths[3] / 2 {
|
|
|
|
let offset = dict.block_offsets[3] + i * 2;
|
|
|
|
cursor.seek(SeekFrom::Start(offset as u64)).ok()?;
|
|
|
|
dict.word.push(cursor.read_le::<u16>().ok()?);
|
|
|
|
}
|
|
|
|
|
|
|
|
for i in 0..dict.block_lengths[4] / 16 {
|
|
|
|
let offset = dict.block_offsets[4] + i * 16;
|
|
|
|
cursor.seek(SeekFrom::Start(offset as u64)).ok()?;
|
|
|
|
dict.entries.push(cursor.read_le::<EntryItem>().ok()?);
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut dict = Dictionary {
|
|
|
|
header: dict,
|
2024-04-20 13:18:03 -04:00
|
|
|
words: Vec::new(),
|
2024-04-18 22:09:26 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
// TODO: lol
|
|
|
|
dict.words = dict.list_words()?;
|
|
|
|
|
|
|
|
Some(dict)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn list_words(&self) -> Option<Vec<String>> {
|
|
|
|
let mut result = Vec::new();
|
|
|
|
let lut = self.generate_index_rune_lookup_table();
|
|
|
|
for (id, v) in self.header.begin_node.iter().enumerate() {
|
|
|
|
if *v == 0 {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
let chara = Dictionary::index_to_rune(&lut, id as u32);
|
|
|
|
self.dump_dict_node(&mut result, *v as i32, String::from(chara as u8 as char))
|
|
|
|
}
|
|
|
|
|
|
|
|
Some(result)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn generate_index_rune_lookup_table(&self) -> HashMap<u16, u16> {
|
|
|
|
let mut map = HashMap::new();
|
|
|
|
for i in 0..self.header.chara_block.len() {
|
|
|
|
map.insert(self.header.chara_block[i] as u16, i as u16);
|
|
|
|
}
|
|
|
|
|
|
|
|
map
|
|
|
|
}
|
|
|
|
|
|
|
|
fn index_to_rune(lookup_table: &HashMap<u16, u16>, index: u32) -> i32 {
|
|
|
|
let higher = index >> 8;
|
|
|
|
let lower = index & 0xFF;
|
|
|
|
|
|
|
|
if higher == 0 {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2025-03-11 16:25:59 -04:00
|
|
|
if let Some(new_val) = lookup_table.get(&(higher as u16)) {
|
2024-04-18 22:09:26 -04:00
|
|
|
(((*new_val as u32) << 8) + lower) as i32
|
|
|
|
} else {
|
|
|
|
0
|
2025-03-11 16:25:59 -04:00
|
|
|
}
|
2024-04-18 22:09:26 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
fn dump_dict_node(&self, vec: &mut Vec<String>, entry_id: i32, prev: String) {
|
|
|
|
let node = &self.header.entries[entry_id as usize];
|
|
|
|
for i in 0..node.sibling {
|
|
|
|
let Some(current) = self.get_string(entry_id, i as i32) else {
|
|
|
|
return;
|
|
|
|
};
|
|
|
|
|
|
|
|
if node.child == 0 {
|
|
|
|
vec.push(prev.clone() + ¤t);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
let value = self.header.inner_node[(node.child + i) as usize];
|
|
|
|
if value == 0 {
|
|
|
|
vec.push(prev.clone() + ¤t);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
self.dump_dict_node(vec, value as i32, prev.clone() + ¤t);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn get_string(&self, entry_id: i32, sibling_id: i32) -> Option<String> {
|
|
|
|
if let Some(characters) = self.get_string_characters(entry_id, sibling_id) {
|
|
|
|
return String::from_utf16(&characters).ok();
|
|
|
|
}
|
|
|
|
|
|
|
|
None
|
|
|
|
}
|
|
|
|
|
|
|
|
fn get_string_characters(&self, entry_id: i32, sibling_id: i32) -> Option<Vec<u16>> {
|
|
|
|
if entry_id as usize >= self.header.entries.len() {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
let entry = self.header.entries.get(entry_id as usize)?;
|
|
|
|
|
|
|
|
if entry.flag == 0 {
|
|
|
|
let pos = (entry.offset / 2) as i32 + sibling_id;
|
|
|
|
if pos as usize > self.header.chara.len() {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
if self.header.chara[pos as usize] == 0 {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Some(vec![self.header.chara[pos as usize]]);
|
|
|
|
}
|
|
|
|
|
|
|
|
let begin = entry.offset / 2;
|
|
|
|
let mut end = begin + 1;
|
|
|
|
|
|
|
|
while (end as usize) < self.header.word.len() && self.header.word[end as usize] != 0 {
|
|
|
|
end += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
Some(self.header.word[begin as usize..end as usize].to_vec())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use std::fs::read;
|
|
|
|
use std::path::PathBuf;
|
|
|
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_invalid() {
|
|
|
|
let mut d = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
|
|
|
d.push("resources/tests");
|
|
|
|
d.push("random");
|
|
|
|
|
|
|
|
// Feeding it invalid data should not panic
|
|
|
|
Dictionary::from_existing(&read(d).unwrap());
|
|
|
|
}
|
2024-04-20 13:18:03 -04:00
|
|
|
}
|