const nonKanaRegex = new RegExp(
    "^[^\u3040-\u309f\u30a0-\u30ff]+$",
    "gmu",
);

const tokenizeRegex = new RegExp(
    "[^\u3040-\u309f\u30a0-\u30ff]+"
    + "|[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uffef]+",
    "gmu",
);

function isNonKana(char) {
    return char?.match(nonKanaRegex) !== null ?? false;
}

export default function pairFurigana(kanji, reading) {
    let tokens = kanji.match(tokenizeRegex);
    if (tokens.length === 0) {
        throw new Error("No tokens found");
    }
    let remainingReading = reading;
    const result = [];
    while (tokens.length > 0) {
        const currentToken = tokens[0];
        const nextToken = tokens[1];
        if (isNonKana(currentToken) && nextToken !== undefined) {
            // Find the next token (which necessarily is a kana, otherwise it would be merged
            // with the current token) in the reading
            const nextTokenInReadingIndex = remainingReading.indexOf(nextToken);
            if (nextTokenInReadingIndex === -1) {
                throw new Error(`Token ${nextToken} not found in reading`);
            }
            const currentTokenReading = remainingReading.slice(0, nextTokenInReadingIndex);
            result.push({
                ruby: currentToken,
                rt: currentTokenReading,
            });
            // Consume the reading
            remainingReading = remainingReading.slice(currentTokenReading.length);
        } else if (isNonKana(currentToken) && nextToken === undefined) {
            // End of string, current token is the last
            result.push({
                ruby: currentToken,
                rt: remainingReading,
            });
        } else {
            // The current token is a kana; find the current token in the reading
            const currentTokenInReadingIndex = remainingReading.indexOf(currentToken);
            if (currentTokenInReadingIndex === -1) {
                throw new Error(`Token ${currentToken} not found in reading`);
            }
            result.push({
                ruby: currentToken,
                rt: currentToken,
            });
            remainingReading = remainingReading.slice(currentToken.length);
        }
        // Consume the current token
        tokens = tokens.slice(1);
    }
    return result;
}
