]> Git — Sourcephile - literate-phylomemy.git/blob - src/Phylomemy/Indexation.hs
init
[literate-phylomemy.git] / src / Phylomemy / Indexation.hs
1 {-# OPTIONS_GHC -Wno-orphans #-}
2
3 module Phylomemy.Indexation where
4
5 import Data.Eq (Eq)
6 import Data.Map.Strict qualified as Map
7 import Data.Monoid (Monoid (..))
8 import Data.Ord (Ord)
9 import Data.Semigroup (Semigroup (..))
10 import Data.Sequence qualified as Seq
11 import Data.Set qualified as Set
12 import Data.Text.Short (ShortText)
13 import Data.Validity (Validity (..), declare, delve, trivialValidation)
14 import Data.Validity.Map ()
15 import Data.Validity.Set ()
16 import Data.Validity.Time ()
17 import GHC.Generics (Generic)
18 import Text.Show (Show)
19
20 -- | A contiguous sequence of n terms
21 newtype Ngram = Ngram ShortText
22 deriving (Eq, Generic, Ord)
23 deriving stock (Show)
24
25 instance Validity Ngram where
26 validate = trivialValidation
27
28 -- | A 'Root' is a set of `Ngram`s conveying the same meaning
29 -- (according to the analyst).
30 data Root = Root
31 { rootLabel :: Ngram
32 , rootSynonyms :: Set.Set Ngram
33 }
34 deriving (Eq, Generic, Ord, Show)
35
36 instance Validity Root where
37 validate r =
38 mconcat
39 [ delve "rootLabel" (rootLabel r)
40 , declare
41 "The rootLabel is not a member of the rootSynonyms"
42 (Set.notMember (rootLabel r) (rootSynonyms r))
43 , delve "rootSynonyms" (rootSynonyms r)
44 ]
45
46 type Roots = Set.Set Root
47 type Foundations = Set.Set Root
48
49 data Document pos = Document
50 { documentPosition :: pos
51 -- ^ A position could be a date, a section, a page, an IP address, …
52 , documentRoots :: Map.Map Root ()
53 -- , documentContent :: a
54 }
55 deriving (Eq, Generic, Show)
56 instance Validity pos => Validity (Document pos)
57
58 type DocumentByRange range pos = Map.Map range (Seq.Seq (Document pos))
59
60 documentsByRange :: Ord range => (pos -> range) -> [Document pos] -> DocumentByRange range pos
61 documentsByRange mapKey docs =
62 Map.fromListWith
63 (<>)
64 [ (mapKey (documentPosition doc), Seq.singleton doc)
65 | doc <- docs
66 ]
67
68 data Range pos = Range
69 { rangeMin :: pos
70 , rangeMax :: pos
71 -- , periodScales :: [Scale]
72 }
73 deriving (Eq, Generic, Show)
74 instance Validity pos => Validity (Range pos)
75
76 type Vocabulary = Map.Map Root ()