{-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Hdoc.DTC.Index where import Control.Category import Control.Monad (Monad(..)) import Data.Bool import Data.Char (Char) import Data.Default.Class (Default(..)) import Data.Foldable (Foldable(..), concat) import Data.Function (($), const) import Data.Functor ((<$>)) import Data.Map.Strict (Map) import Data.Maybe (Maybe(..), maybe, listToMaybe) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.Sequence ((|>)) import Data.TreeMap.Strict (TreeMap(..)) import Data.TreeSeq.Strict (Tree(..), tree0) import qualified Data.Char as Char import qualified Data.List as List import qualified Data.Map.Strict as Map import qualified Data.Sequence as Seq import qualified Data.Strict.Maybe as Strict import qualified Data.Text.Lazy as TL import qualified Data.TreeMap.Strict as TreeMap -- import qualified Data.TreeSeq.Strict as TreeSeq import Hdoc.DTC.Document import qualified Hdoc.XML as XML -- * Type 'PathWord' type PathWord = TreeMap.Path Word pathFromWords :: Words -> Maybe PathWord pathFromWords ws = case ws >>= unSpace of p:ps | not (TL.null p) -> Just (TreeMap.path p ps) _ -> Nothing where unSpace = \case Space -> [] Word w -> [w] -- * Type 'Irefs' type Irefs = TreeMap Word [Anchor] irefsOfTerms :: Terms -> Irefs irefsOfTerms = TreeMap.fromList const . (>>= f) . concat where f [] = [] f ws = maybe [] (\p -> [(p,[])]) $ pathFromWords ws indexifyWords :: XML.Pos -> Irefs -> Words -> (Irefs, Plain) indexifyWords section = go mempty where go :: Plain -> Irefs -> Words -> (Irefs, Plain) go acc irefs inp = case inp of [] -> (irefs, acc) Space : next -> go (acc |> tree0 (PlainText " ")) irefs next Word w : next -> case goWords irefs [] inp of Nothing -> go (acc |> tree0 (PlainText w)) irefs next Just (anchor, ls, ns, rs) -> let iref_term = List.reverse ls in let lines = Seq.fromList $ tree0 . PlainText . plainifyWord <$> iref_term in go (acc |> Tree PlainIref { iref_term , iref_anchor=Just anchor } lines) rs ns goWords :: Irefs -> Words -> Words -> Maybe (Anchor, Words, Words, Irefs) goWords m@(TreeMap irefsByWord) prev inp = case inp of [] -> Nothing curr@Space : next -> goWords m (curr:prev) next curr@(Word w) : next -> case Map.lookup w irefsByWord of Nothing -> Nothing Just nod@TreeMap.Node{..} -> let prev' = curr:prev in case node_value of Strict.Nothing | null node_descendants -> Nothing | otherwise -> (<$> goWords node_descendants prev' next) $ \(anch, ls, ns, rs) -> (anch, ls, ns, TreeMap $ Map.insert w nod{TreeMap.node_descendants = rs} irefsByWord) Strict.Just anchs -> case goWords node_descendants prev' next of Nothing -> let anch = Anchor { anchor_count = maybe def (succNat1 . anchor_count) $ listToMaybe anchs , anchor_section = section } in Just (anch, prev', next, TreeMap $ Map.insert w nod{TreeMap.node_value = Strict.Just $ anch:anchs} irefsByWord) Just (anch, ls, ns, rs) -> Just (anch, ls, ns, TreeMap $ Map.insert w nod{TreeMap.node_descendants = rs} irefsByWord) wordify :: TL.Text -> Words wordify = List.reverse . go [] where go :: Words -> TL.Text -> Words go acc t = case TL.span Char.isAlphaNum t of ("",_) -> case TL.span Char.isSpace t of ("",_) -> case TL.uncons t of Nothing -> acc Just (c,r) -> go (Word (TL.singleton c) : acc) r (_s,r) -> go (Space : acc) r (w,r) -> go (Word w : acc) r plainifyWord :: WordOrSpace -> TL.Text plainifyWord = \case Word w -> w Space -> " " plainifyWords :: Words -> TL.Text plainifyWords = TL.concat . (plainifyWord <$>) termsByChar :: Terms -> Map Char Terms termsByChar = foldr (\aliases acc -> case aliases of (Word w:_):_ | not (TL.null w) -> Map.insertWith (<>) (Char.toUpper $ TL.index w 0) [aliases] acc _ -> acc ) Map.empty