// This contains logic for trying to guess a user's "given name" from
// their full name.
// See https://github.com/morebetterlabs/ruzuku-v2/issues/2577

// This is required due to the unusual regex patterns for language types below.
/* eslint-disable no-misleading-character-class */

// Is this word an honorific? (eg: Dr., Mrs., Prof., etc.)
// see: https://en.wikipedia.org/wiki/English_honorifics
const isHonorificPrefix = (name: string) => {
  return [
    'the',
    'mr',
    'ms',
    'miss',
    'mrs',
    'dr',
    'sir',
    'prof',
    'br',
    'sr',
    'fr',
    'rev',
    'reverend',
    'col',
    'capt',
    'gen',
    'pr',
    'lt',
    'maj',
    'sgt',
  ].includes(name.toLowerCase().replace('.', ''));
};

// Regex to check if a word is composed of Latin or Cyrillic alphabetic characters only
const isAlphabeticRegex = new RegExp(
  /^[\p{Letter}a-åa-ö-w-я\u0400-\u0484\u0487-\u052F\u1C80-\u1C88\u1D2B\u1D78\u2DE0-\u2DFF\uA640-\uA69F\uFE2E\uFE2F]$/gu,
);

// Regex to match any quote-like characters
const quotesRegex = new RegExp(/[\u0022\u0027\u0060\u00B4\u2018\u2019\u201C\u201D]+/gu);

// Regex pattern to confirm that word includes Latin-like or Cyrillic letters (ie: is not entirely composed of numbers, symbols, etc.)
const containsLettersRegex = new RegExp(
  /[A-Za-åa-ö-w-я\u0400-\u0484\u0487-\u052F\u1C80-\u1C88\u1D2B\u1D78\u2DE0-\u2DFF\uA640-\uA69F\uFE2E\uFE2F]+/gu,
);

// Regex pattern to check if a name is a single word of Latin-like letters, numbers, and hyphens.
const isAlphaNumericWordRegex = new RegExp(/^[\p{Letter}a-åa-ö-w-я0-9-]{2,}$/gu);

// Regex expression to confirm this is two or three characters in Chinese, Japanese,
//  or Korean script unicode range. If so, it's probably in East Asian family-name-first format.
const isAsianScriptRegex = new RegExp(
  '[\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff66-\uff9f]{2,3}',
);

// Regex pattern to check if a name is two or more characters of Cyrillic letters, excluding numbers and hyphens.
const isCyrillicScriptRegex = new RegExp(
  /[\u0400-\u0484\u0487-\u052F\u1C80-\u1C88\u1D2B\u1D78\u2DE0-\u2DFF\uA640-\uA69F\uFE2E\uFE2F]{2,}/gu,
);

// Regex pattern to check if a word is an email
const isEmailRegex = new RegExp(/^.+@{1}[A-Za-z0-9]+\.[A-Za-z]+/);

// Check if a word is two or three characters in Chinese, Japanese, or Korean script.
// If so, it's probably in East Asian family-name-first format.
const isAsianFormat = (name: string) =>
  name.length > 1 && name.length <= 3 && name.match(isAsianScriptRegex);

// Check if a name is a word of two or more Latin-like letters, numbers, and hyphens
const isAlphaNumericWord = (name: string) => name.match(isAlphaNumericWordRegex);

// Check if a name is a word of two or more Cyrillic letters
const isCyrillicWord = (name: string) => name.match(isCyrillicScriptRegex);

// If the word is alphabetic and has length of one, after removing dots, it's probably an initial.
const isAnInitial = (name: string) => {
  name = name.replace('.', '');
  return name.match(isAlphabeticRegex) && name.length === 1;
};

// Check that a word contains letters, and not just numbers or symbols
export const containsLetters = (name: string) => !!name.match(containsLettersRegex);

// If the word contains an '@' symbol includes letters, and is not space separated, it's probably an email.
export const isEmailFormat = (name: string) => !name.includes(' ') && !!name.match(isEmailRegex);

/**
 *  Split a full name by spaces, or symbols '+', '.', '@', '_', returning the first word-like chunk, if any.
 *  - Returned word can be composed of Latin-like or Cyrillic letters and numbers.
 *  - Ignores initials (eg: "J.S.") and honorifics (eg: "Prof.", "Dr.", "Ms.", etc.)
 */
const firstAlphanumericWord = (name: string) => {
  return name
    .split(/[.\s_+@]/)
    .find(
      (x) =>
        containsLetters(x) &&
        !isAnInitial(x) &&
        !isHonorificPrefix(x) &&
        (isAlphaNumericWord(x) || isCyrillicWord(x)),
    );
};

/**
 * Try to intelligently infer a user's "given name" from their full name.
 */
export function generateDisplayName(fullName: string): string {
  if (!fullName.length) {
    return '';
  }

  // Strip out any double-quote characters and trailing spaces.
  fullName = fullName.replaceAll(quotesRegex, '').trim();

  // Is the name a single string containing an '@' symbol?
  // It's probably an email. Return the chunk preceding the '@' and any "+" alias symbols.
  if (isEmailFormat(fullName)) {
    return fullName.split('@')[0].split('+')[0];
  }

  // If it's a single word of two or three non-Western characters, it's likely to be a Chinese/East-Asian name format. Return the final 1 or 2 characters
  if (isAsianFormat(fullName)) {
    return fullName.slice(1);
  }

  // If it contains one or more spaces, there's a good chance the word is their given name.
  // Otherwise return the entire name
  return firstAlphanumericWord(fullName) || fullName;
}
