{-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Hdoc.DTC.Index where import Control.Category import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Default.Class (Default(..)) import Data.Foldable (Foldable(..), concat) import Data.Function (($), const) import Data.Functor ((<$>)) import Data.Map.Strict (Map) import Data.Maybe (Maybe(..), maybe) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence ((|>)) import Data.TreeMap.Strict (TreeMap(..)) import Data.TreeSeq.Strict (Tree(..), tree0) import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Map.Strict as Map import qualified Data.Sequence as Seq import qualified Data.Strict.Maybe as Strict import qualified Data.Text.Lazy as TL import qualified Data.TreeMap.Strict as TreeMap -- import qualified Data.TreeSeq.Strict as TreeSeq import Hdoc.DTC.Document -- * Type 'PathWord' type PathWord = TreeMap.Path Word pathFromWords :: Words -> Maybe PathWord pathFromWords ws = case ws >>= unSpace of p:ps | not (TL.null p) -> Just (TreeMap.path p ps) _ -> Nothing where unSpace = \case Space -> [] Word w -> [w] -- * Type 'Irefs' type Irefs = TreeMap Word [Anchor] irefsOfTerms :: Terms -> Irefs irefsOfTerms = TreeMap.fromList const . (>>= f) . concat where f [] = [] f ws = maybe [] (\p -> [(p,[])]) $ pathFromWords ws indexifyWords :: XmlPos -> Irefs -> Words -> (Irefs, Plain) indexifyWords section = go mempty where go :: Plain -> Irefs -> Words -> (Irefs, Plain) go acc irefs inp = case inp of [] -> (irefs, acc) Space : next -> go (acc |> tree0 (PlainText " ")) irefs next Word w : next -> case goWords irefs [] inp of Nothing -> go (acc |> tree0 (PlainText w)) irefs next Just (anch, ls, ns, rs) -> let term = List.reverse ls in let lines = Seq.fromList $ tree0 . PlainText . plainifyWord <$> term in go (acc |> Tree PlainIref{term, anchor=Just anch} lines) rs ns goWords :: Irefs -> Words -> Words -> Maybe (Anchor, Words, Words, Irefs) goWords m@(TreeMap irefsByWord) prev inp = case inp of [] -> Nothing curr@Space : next -> goWords m (curr:prev) next curr@(Word w) : next -> case Map.lookup w irefsByWord of Nothing -> Nothing Just nod@TreeMap.Node{..} -> let prev' = curr:prev in case node_value of Strict.Nothing | null node_descendants -> Nothing | otherwise -> (<$> goWords node_descendants prev' next) $ \(anch, ls, ns, rs) -> (anch, ls, ns, TreeMap $ Map.insert w nod{TreeMap.node_descendants = rs} irefsByWord) Strict.Just anchs -> case goWords node_descendants prev' next of Nothing -> let count = case anchs of [] -> def; Anchor{count=c}:_ -> succNat1 c in let anch = Anchor{count, section} in Just (anch, prev', next, TreeMap $ Map.insert w nod{TreeMap.node_value = Strict.Just $ anch:anchs} irefsByWord) Just (anch, ls, ns, rs) -> Just (anch, ls, ns, TreeMap $ Map.insert w nod{TreeMap.node_descendants = rs} irefsByWord) wordify :: TL.Text -> Words wordify = List.reverse . go [] where go :: Words -> TL.Text -> Words go acc t = case TL.span Char.isAlphaNum t of ("",_) -> case TL.span Char.isSpace t of ("",_) -> case TL.uncons t of Nothing -> acc Just (c,r) -> go (Word (TL.singleton c) : acc) r (_s,r) -> go (Space : acc) r (w,r) -> go (Word w : acc) r plainifyWord :: WordOrSpace -> TL.Text plainifyWord = \case Word w -> w Space -> " " plainifyWords :: Words -> TL.Text plainifyWords = TL.concat . (plainifyWord <$>) termsByChar :: Terms -> Map Char Terms termsByChar = foldr (\aliases acc -> case aliases of (Word w:_):_ | not (TL.null w) -> Map.insertWith (<>) (Char.toUpper $ TL.index w 0) [aliases] acc _ -> acc ) Map.empty