]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Corpus/Parsers/FrameWrite.hs
[frame write] date paser work, update lts to 18.12
[gargantext.git] / src / Gargantext / Core / Text / Corpus / Parsers / FrameWrite.hs
1 module Gargantext.Core.Text.Corpus.Parsers.FrameWrite where
2
3 import Control.Applicative ((*>))
4 import Control.Monad (void)
5 import Data.Either
6 import Data.Maybe
7 import Data.Text hiding (foldl)
8 import Gargantext.Prelude
9 import Prelude ((++))
10 import Text.Parsec hiding (Line)
11 import Text.Parsec.Number (number)
12 import Text.Parsec.String
13
14
15 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/331
16
17 -- title : everything above the first ==
18 -- Authors : default : anonymous ; except if the following line is encountered ^@@authors: FirstName1, LastName1 ; FirstName2, LastName2 ; etc.
19 -- date : default : date of last change except if the following line is encountered ^@@date: 2021-09-10
20 -- source: Name of the root node except if the following line is encountered ^@@source:
21 -- By default, 1 framawrite node = 1 document. Option for further developments: allow to give a level at generation for the split within framawrite node : :
22 --
23 -- par défaut: un doc == 1 NodeWrite
24 -- ## mean each ## section will be a new document with title the subsubsection title. Either it features options for author, date etc. or it will inherit the document's option.
25
26 sample :: Text
27 sample =
28 unlines
29 [ "title1"
30 , "title2"
31 , "=="
32 , "^@@authors: FirstName1, LastName1; FirstName2, LastName2"
33 , "^@@date: 2021-09-10"
34 , "^@@source: someSource"
35 , "document contents 1"
36 , "document contents 2"
37 ]
38
39 sampleUnordered :: Text
40 sampleUnordered =
41 unlines
42 [ "title1"
43 , "title2"
44 , "=="
45 , "document contents 1"
46 , "^@@date: 2021-09-10"
47 , "^@@authors: FirstName1, LastName1; FirstName2, LastName2"
48 , "^@@source: someSource"
49 , "document contents 2"
50 ]
51
52 -- parseSample = parse documentP "sample" (unpack sample)
53 -- parseSampleUnordered = parse documentP "sampleUnordered" (unpack sampleUnordered)
54 parseLinesSample :: Either ParseError Parsed
55 parseLinesSample = parseLines sample
56 parseLinesSampleUnordered :: Either ParseError Parsed
57 parseLinesSampleUnordered = parseLines sampleUnordered
58
59 data Author =
60 Author { firstName :: Text
61 , lastName :: Text }
62 deriving (Show)
63
64 data Parsed =
65 Parsed { title :: Text
66 , authors :: [Author]
67 , date :: Maybe Text
68 , source :: Maybe Text
69 , contents :: Text }
70 deriving (Show)
71
72 emptyParsed :: Parsed
73 emptyParsed =
74 Parsed { title = ""
75 , authors = []
76 , date = Nothing
77 , source = Nothing
78 , contents = "" }
79
80 data Date =
81 Date { year :: Int
82 , month :: Int
83 , day :: Int }
84 deriving (Show)
85
86 data Line =
87 LAuthors [Author]
88 | LContents Text
89 | LDate Date
90 | LSource Text
91 | LTitle Text
92 deriving (Show)
93
94 parseLines :: Text -> Either ParseError Parsed
95 parseLines text = foldl f emptyParsed <$> lst
96 where
97 lst = parse documentLinesP "" (unpack text)
98 f (Parsed { .. }) (LAuthors as) = Parsed { authors = as, .. }
99 f (Parsed { .. }) (LContents c) = Parsed { contents = concat [contents, c], .. }
100 f (Parsed { .. }) (LDate d ) = Parsed { date = Just d, .. }
101 f (Parsed { .. }) (LSource s ) = Parsed { source = Just s, .. }
102 f (Parsed { .. }) (LTitle t ) = Parsed { title = t, .. }
103
104 documentLinesP :: Parser [Line]
105 documentLinesP = do
106 t <- titleP
107 ls <- lineP `sepBy` newline
108 pure $ [LTitle $ pack t] ++ ls
109
110 lineP :: Parser Line
111 lineP = do
112 choice [ try authorsLineP
113 , try dateLineP
114 , try sourceLineP
115 , contentsLineP ]
116
117 authorsLineP :: Parser Line
118 authorsLineP = do
119 authors <- authorsP
120 pure $ LAuthors authors
121
122 dateLineP :: Parser Line
123 dateLineP = do
124 date <- dateP
125 pure $ LDate date
126
127 sourceLineP :: Parser Line
128 sourceLineP = do
129 source <- sourceP
130 pure $ LSource $ pack source
131
132 contentsLineP :: Parser Line
133 contentsLineP = do
134 contents <- many (noneOf "\n")
135 pure $ LContents $ pack contents
136
137 --------------------
138
139 -- documentP = do
140 -- t <- titleP
141 -- a <- optionMaybe authorsP
142 -- d <- optionMaybe dateP
143 -- s <- optionMaybe sourceP
144 -- c <- contentsP
145 -- pure $ Parsed { title = pack t
146 -- , authors = fromMaybe [] a
147 -- , date = pack <$> d
148 -- , source = pack <$> s
149 -- , contents = pack c }
150
151 titleDelimiterP :: Parser ()
152 titleDelimiterP = do
153 _ <- newline
154 _ <- string "=="
155 tokenEnd
156 titleP :: Parser [Char]
157 titleP = manyTill anyChar (try titleDelimiterP)
158
159 authorsPrefixP :: Parser [Char]
160 authorsPrefixP = do
161 _ <- string "^@@authors:"
162 many (char ' ')
163 authorsP :: Parser [Author]
164 authorsP = try authorsPrefixP *> sepBy authorP (char ';')
165 authorP :: Parser Author
166 authorP = do
167 fn <- manyTill anyChar (char ',')
168 _ <- many (char ' ')
169 --ln <- manyTill anyChar (void (char ';') <|> tokenEnd)
170 --ln <- manyTill anyChar (tokenEnd)
171 ln <- many (noneOf "\n")
172 pure $ Author { firstName = pack fn, lastName = pack ln }
173 -- manyTill anyChar (void (char '\n') <|> eof)
174
175 datePrefixP :: Parser [Char]
176 datePrefixP = do
177 _ <- string "^@@date:"
178 many (char ' ')
179 dateP :: Parser [Char]
180 dateP = try datePrefixP
181 *> many (noneOf "\n")
182
183 dateISOP :: Parser Date
184 dateISOP = do
185 year <- number
186 _ <- char '-'
187 month <- number
188 _ <- char '-'
189 day <- number
190 pure $ Date { year, month, day }
191
192 sourcePrefixP :: Parser [Char]
193 sourcePrefixP = do
194 _ <- string "^@@source:"
195 many (char ' ')
196 sourceP :: Parser [Char]
197 sourceP = try sourcePrefixP
198 *> many (noneOf "\n")
199
200 -- contentsP :: Parser String
201 -- contentsP = many anyChar
202
203 tokenEnd :: Parser ()
204 tokenEnd = void (char '\n') <|> eof