1 module Gargantext.Core.Text.Corpus.Parsers.FrameWrite where
3 import Control.Applicative ((*>))
4 import Control.Monad (void)
7 import Data.Text hiding (foldl)
8 import Gargantext.Prelude
10 import Text.Parsec hiding (Line)
11 import Text.Parsec.Number (number)
12 import Text.Parsec.String
15 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/331
17 -- title : everything above the first ==
18 -- Authors : default : anonymous ; except if the following line is encountered ^@@authors: FirstName1, LastName1 ; FirstName2, LastName2 ; etc.
19 -- date : default : date of last change except if the following line is encountered ^@@date: 2021-09-10
20 -- source: Name of the root node except if the following line is encountered ^@@source:
21 -- By default, 1 framawrite node = 1 document. Option for further developments: allow to give a level at generation for the split within framawrite node : :
23 -- par défaut: un doc == 1 NodeWrite
24 -- ## mean each ## section will be a new document with title the subsubsection title. Either it features options for author, date etc. or it will inherit the document's option.
32 , "^@@authors: FirstName1, LastName1; FirstName2, LastName2"
33 , "^@@date: 2021-09-10"
34 , "^@@source: someSource"
35 , "document contents 1"
36 , "document contents 2"
39 sampleUnordered :: Text
45 , "document contents 1"
46 , "^@@date: 2021-09-10"
47 , "^@@authors: FirstName1, LastName1; FirstName2, LastName2"
48 , "^@@source: someSource"
49 , "document contents 2"
52 -- parseSample = parse documentP "sample" (unpack sample)
53 -- parseSampleUnordered = parse documentP "sampleUnordered" (unpack sampleUnordered)
54 parseLinesSample :: Either ParseError Parsed
55 parseLinesSample = parseLines sample
56 parseLinesSampleUnordered :: Either ParseError Parsed
57 parseLinesSampleUnordered = parseLines sampleUnordered
60 Author { firstName :: Text
65 Parsed { title :: Text
68 , source :: Maybe Text
94 parseLines :: Text -> Either ParseError Parsed
95 parseLines text = foldl f emptyParsed <$> lst
97 lst = parse documentLinesP "" (unpack text)
98 f (Parsed { .. }) (LAuthors as) = Parsed { authors = as, .. }
99 f (Parsed { .. }) (LContents c) = Parsed { contents = concat [contents, c], .. }
100 f (Parsed { .. }) (LDate d ) = Parsed { date = Just d, .. }
101 f (Parsed { .. }) (LSource s ) = Parsed { source = Just s, .. }
102 f (Parsed { .. }) (LTitle t ) = Parsed { title = t, .. }
104 documentLinesP :: Parser [Line]
107 ls <- lineP `sepBy` newline
108 pure $ [LTitle $ pack t] ++ ls
112 choice [ try authorsLineP
117 authorsLineP :: Parser Line
120 pure $ LAuthors authors
122 dateLineP :: Parser Line
127 sourceLineP :: Parser Line
130 pure $ LSource $ pack source
132 contentsLineP :: Parser Line
134 contents <- many (noneOf "\n")
135 pure $ LContents $ pack contents
141 -- a <- optionMaybe authorsP
142 -- d <- optionMaybe dateP
143 -- s <- optionMaybe sourceP
145 -- pure $ Parsed { title = pack t
146 -- , authors = fromMaybe [] a
147 -- , date = pack <$> d
148 -- , source = pack <$> s
149 -- , contents = pack c }
151 titleDelimiterP :: Parser ()
156 titleP :: Parser [Char]
157 titleP = manyTill anyChar (try titleDelimiterP)
159 authorsPrefixP :: Parser [Char]
161 _ <- string "^@@authors:"
163 authorsP :: Parser [Author]
164 authorsP = try authorsPrefixP *> sepBy authorP (char ';')
165 authorP :: Parser Author
167 fn <- manyTill anyChar (char ',')
169 --ln <- manyTill anyChar (void (char ';') <|> tokenEnd)
170 --ln <- manyTill anyChar (tokenEnd)
171 ln <- many (noneOf "\n")
172 pure $ Author { firstName = pack fn, lastName = pack ln }
173 -- manyTill anyChar (void (char '\n') <|> eof)
175 datePrefixP :: Parser [Char]
177 _ <- string "^@@date:"
179 dateP :: Parser [Char]
180 dateP = try datePrefixP
181 *> many (noneOf "\n")
183 dateISOP :: Parser Date
190 pure $ Date { year, month, day }
192 sourcePrefixP :: Parser [Char]
194 _ <- string "^@@source:"
196 sourceP :: Parser [Char]
197 sourceP = try sourcePrefixP
198 *> many (noneOf "\n")
200 -- contentsP :: Parser String
201 -- contentsP = many anyChar
203 tokenEnd :: Parser ()
204 tokenEnd = void (char '\n') <|> eof