module Worksheets.Text where

import Control.Monad (when)
import Data.ByteString.Builder (Builder)
import Data.Char qualified as Char
import Data.Foldable (foldMap)
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Maybe (Maybe (..))
import Data.Text (Text)
import Data.Text qualified as Text
import Paths_worksheets qualified as Self
import System.FilePath.Posix ((</>))
import System.FilePath.Posix qualified as File
import Text.Blaze
import Text.Blaze.Html5 qualified as H
import Text.Blaze.Html5.Attributes qualified as HA
import Text.Blaze.Renderer.Utf8 qualified as Blaze
import Text.Show (Show (..))
import Prelude

type Texte = [TexteAtome]
data TexteAtome = TexteAtome
  { texteAtomeLangue :: Langue
  , texteAtomeGlyphs :: Text
  , texteAtomePhonetic :: Maybe Text
  }
  deriving (Eq, Ord, Show)

data Langue
  = LangueFrançais
  | LangueAnglais
  | LangueMandarin
  | LangueMandarinPinyin
  | LanguePhonetic
  deriving (Eq, Ord, Show)

splitOnWords :: Text -> [Text]
splitOnWords = splitOnChar splitterOnWords

splitOnChar :: (Char -> Bool) -> Text -> [Text]
splitOnChar f = go
  where
    go t =
      case f `Text.span` t of
        (l, r)
          | Text.null l && Text.null r -> []
          | Text.null l -> Text.singleton (Text.head t) : go (Text.tail t)
          | Text.null r -> [l]
          | otherwise -> l : go r

splitterOnWords :: Char -> Bool
splitterOnWords c = case Char.generalCategory c of
  Char.UppercaseLetter -> False
  Char.LowercaseLetter -> False
  Char.TitlecaseLetter -> False
  Char.ModifierLetter -> False
  Char.OtherLetter -> False
  Char.NonSpacingMark -> True
  Char.SpacingCombiningMark -> True
  Char.EnclosingMark -> True
  Char.DecimalNumber -> False
  Char.LetterNumber -> False
  Char.OtherNumber -> False
  Char.ConnectorPunctuation -> True
  Char.DashPunctuation -> True
  Char.OpenPunctuation -> True
  Char.ClosePunctuation -> True
  Char.InitialQuote -> True
  Char.FinalQuote -> True
  Char.OtherPunctuation -> True
  Char.MathSymbol -> True
  Char.CurrencySymbol -> True
  Char.ModifierSymbol -> True
  Char.OtherSymbol -> True
  Char.Space -> True
  Char.LineSeparator -> True
  Char.ParagraphSeparator -> True
  Char.Control -> True
  Char.Format -> True
  Char.Surrogate -> True
  Char.PrivateUse -> True
  Char.NotAssigned -> True

data CharClass
  = CharClassLetter
  | CharClassMark
  | CharClassNumber
  | CharClassPunctuation
  | CharClassQuote
  | CharClassSymbol
  | CharClassSeparator
  | CharClassOther
  deriving (Eq, Ord, Show)

charClassifiers =
  [ (CharClassLetter, isLetter)
  , (CharClassMark, isMark)
  , (CharClassNumber, isNumber)
  , (CharClassPunctuation, isPunctuation)
  , (CharClassQuote, isQuote)
  , (CharClassSymbol, isSymbol)
  , (CharClassSeparator, isSeparator)
  , (CharClassOther, isOther)
  ]
  where
    isLetter Char.UppercaseLetter = True
    isLetter Char.LowercaseLetter = True
    isLetter Char.TitlecaseLetter = True
    isLetter Char.ModifierLetter = True
    isLetter Char.OtherLetter = True
    isLetter _ = False
    isMark Char.NonSpacingMark = True
    isMark Char.SpacingCombiningMark = True
    isMark Char.EnclosingMark = True
    isMark _ = False
    isNumber Char.DecimalNumber = True
    isNumber Char.LetterNumber = True
    isNumber Char.OtherNumber = True
    isNumber _ = False
    isPunctuation Char.ConnectorPunctuation = True
    isPunctuation Char.DashPunctuation = True
    isPunctuation Char.OpenPunctuation = True
    isPunctuation Char.ClosePunctuation = True
    isPunctuation Char.OtherPunctuation = True
    isPunctuation _ = False
    isQuote Char.InitialQuote = True
    isQuote Char.FinalQuote = True
    isQuote _ = False
    isSymbol Char.MathSymbol = True
    isSymbol Char.CurrencySymbol = True
    isSymbol Char.ModifierSymbol = True
    isSymbol Char.OtherSymbol = True
    isSymbol _ = False
    isSeparator Char.Space = True
    isSeparator Char.LineSeparator = True
    isSeparator Char.ParagraphSeparator = True
    isSeparator _ = False
    isOther Char.Control = True
    isOther Char.Format = True
    isOther Char.Surrogate = True
    isOther Char.PrivateUse = True
    isOther Char.NotAssigned = True
    isOther _ = False

classifyText = go charClassifiers
  where
    go _ t | Text.null t = []
    go [] t = (CharClassOther, Text.singleton (Text.head t)) : go charClassifiers (Text.tail t)
    go ((clsD, clsF) : fs) t =
      case Text.span (clsF . Char.generalCategory) t of
        (l, r)
          | Text.null l -> go fs t
          | otherwise -> (clsD, l) : go charClassifiers r

isClassUnderscore (CharClassPunctuation, Text.all (== '_') -> True) = True
isClassUnderscore _ = False
isClassSeparator = \case
  (CharClassSeparator, _) -> True
  _ -> False
splitOnUnderscore = splitCharClass isClassUnderscore
splitOnSeparator = splitCharClass isClassSeparator

splitCharClass :: ((CharClass, Text) -> Bool) -> [(CharClass, Text)] -> [[(CharClass, Text)]]
splitCharClass isSep = go
  where
    go [] = []
    go ts =
      case ts & List.span isSep of
        ([], _r) ->
          case ts & List.break isSep of
            (l, r) -> l : go r
        (_l, r) -> go r

zipTexte :: Texte -> Texte -> [(TexteAtome, TexteAtome)]
zipTexte = List.zip

-- m0 = "开心果 冰淇淋 很 好吃"
-- p0 = "kāi_xīn_guǒ bīng_qí_lín hěn hào_chī"

splitMandarin t =
  t
    & classifyText
    & splitCharClass isClassSeparator
    <&> foldMap \(_cls, c) -> c & Text.unpack
splitPinyin t =
  t
    & classifyText
    & splitCharClass isClassSeparator
    <&> foldMap \ct@(_cls, c) -> [c | not (isClassUnderscore ct)]

-- z0 = List.zipWith (List.zip) m0 p0