1 {-# OPTIONS_GHC -Wno-orphans #-}
3 module Phylomemy.Indexation where
6 import Data.Map.Strict qualified as Map
7 import Data.Monoid (Monoid (..))
9 import Data.Semigroup (Semigroup (..))
10 import Data.Sequence qualified as Seq
11 import Data.Set qualified as Set
12 import Data.Text.Short (ShortText)
13 import Data.Validity (Validity (..), declare, delve, trivialValidation)
14 import Data.Validity.Map ()
15 import Data.Validity.Set ()
16 import Data.Validity.Time ()
17 import GHC.Generics (Generic)
18 import Text.Show (Show)
20 -- | A contiguous sequence of n terms
21 newtype Ngram = Ngram ShortText
22 deriving (Eq, Generic, Ord)
25 instance Validity Ngram where
26 validate = trivialValidation
28 -- | A 'Root' is a set of `Ngram`s conveying the same meaning
29 -- (according to the analyst).
32 , rootSynonyms :: Set.Set Ngram
34 deriving (Eq, Generic, Ord, Show)
36 instance Validity Root where
39 [ delve "rootLabel" (rootLabel r)
41 "The rootLabel is not a member of the rootSynonyms"
42 (Set.notMember (rootLabel r) (rootSynonyms r))
43 , delve "rootSynonyms" (rootSynonyms r)
46 type Roots = Set.Set Root
47 type Foundations = Set.Set Root
49 data Document pos = Document
50 { documentPosition :: pos
51 -- ^ A position could be a date, a section, a page, an IP address, …
52 , documentRoots :: Map.Map Root ()
53 -- , documentContent :: a
55 deriving (Eq, Generic, Show)
56 instance Validity pos => Validity (Document pos)
58 type DocumentByRange range pos = Map.Map range (Seq.Seq (Document pos))
60 documentsByRange :: Ord range => (pos -> range) -> [Document pos] -> DocumentByRange range pos
61 documentsByRange mapKey docs =
64 [ (mapKey (documentPosition doc), Seq.singleton doc)
68 data Range pos = Range
71 -- , periodScales :: [Scale]
73 deriving (Eq, Generic, Show)
74 instance Validity pos => Validity (Range pos)
76 type Vocabulary = Map.Map Root ()