1 module Gargantext.Core.Text.Corpus.Parsers.FrameWrite where
3 import Control.Applicative ((*>))
4 import Control.Monad (void)
7 import Data.Text hiding (foldl)
8 import Gargantext.Prelude
9 import Prelude ((++), read)
10 import Text.Parsec hiding (Line)
11 import Text.Parsec.String
14 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/331
16 -- title : everything above the first ==
17 -- Authors : default : anonymous ; except if the following line is encountered ^@@authors: FirstName1, LastName1 ; FirstName2, LastName2 ; etc.
18 -- date : default : date of last change except if the following line is encountered ^@@date: 2021-09-10
19 -- source: Name of the root node except if the following line is encountered ^@@source:
20 -- By default, 1 framawrite node = 1 document. Option for further developments: allow to give a level at generation for the split within framawrite node : :
22 -- par défaut: un doc == 1 NodeWrite
23 -- ## mean each ## section will be a new document with title the subsubsection title. Either it features options for author, date etc. or it will inherit the document's option.
31 , "^@@authors: FirstName1, LastName1; FirstName2, LastName2"
32 , "^@@date: 2021-09-10"
33 , "^@@source: someSource"
34 , "document contents 1"
35 , "document contents 2"
38 sampleUnordered :: Text
44 , "document contents 1"
45 , "^@@date: 2021-09-10"
46 , "^@@authors: FirstName1, LastName1; FirstName2, LastName2"
47 , "^@@source: someSource"
48 , "document contents 2"
51 -- parseSample = parse documentP "sample" (unpack sample)
52 -- parseSampleUnordered = parse documentP "sampleUnordered" (unpack sampleUnordered)
53 parseLinesSample :: Either ParseError Parsed
54 parseLinesSample = parseLines sample
55 parseLinesSampleUnordered :: Either ParseError Parsed
56 parseLinesSampleUnordered = parseLines sampleUnordered
59 Author { firstName :: Text
64 Parsed { title :: Text
67 , source :: Maybe Text
80 Date { year :: Integer
93 parseLines :: Text -> Either ParseError Parsed
94 parseLines text = foldl f emptyParsed <$> lst
96 lst = parse documentLinesP "" (unpack text)
97 f (Parsed { .. }) (LAuthors as) = Parsed { authors = as, .. }
98 f (Parsed { .. }) (LContents c) = Parsed { contents = concat [contents, c], .. }
99 f (Parsed { .. }) (LDate d ) = Parsed { date = Just d, .. }
100 f (Parsed { .. }) (LSource s ) = Parsed { source = Just s, .. }
101 f (Parsed { .. }) (LTitle t ) = Parsed { title = t, .. }
103 documentLinesP :: Parser [Line]
106 ls <- lineP `sepBy` newline
107 pure $ [LTitle $ pack t] ++ ls
111 choice [ try authorsLineP
116 authorsLineP :: Parser Line
119 pure $ LAuthors authors
121 dateLineP :: Parser Line
126 sourceLineP :: Parser Line
129 pure $ LSource $ pack source
131 contentsLineP :: Parser Line
133 contents <- many (noneOf "\n")
134 pure $ LContents $ pack contents
140 -- a <- optionMaybe authorsP
141 -- d <- optionMaybe dateP
142 -- s <- optionMaybe sourceP
144 -- pure $ Parsed { title = pack t
145 -- , authors = fromMaybe [] a
146 -- , date = pack <$> d
147 -- , source = pack <$> s
148 -- , contents = pack c }
150 titleDelimiterP :: Parser ()
155 titleP :: Parser [Char]
156 titleP = manyTill anyChar (try titleDelimiterP)
158 authorsPrefixP :: Parser [Char]
160 _ <- string "^@@authors:"
162 authorsP :: Parser [Author]
163 authorsP = try authorsPrefixP *> sepBy authorP (char ';')
164 authorP :: Parser Author
166 fn <- manyTill anyChar (char ',')
168 --ln <- manyTill anyChar (void (char ';') <|> tokenEnd)
169 --ln <- manyTill anyChar (tokenEnd)
170 ln <- many (noneOf "\n")
171 pure $ Author { firstName = pack fn, lastName = pack ln }
172 -- manyTill anyChar (void (char '\n') <|> eof)
174 datePrefixP :: Parser [Char]
176 _ <- string "^@@date:"
179 dateP = try datePrefixP
181 -- *> many (noneOf "\n")
183 dateISOP :: Parser Date
185 year <- rd <$> number
187 month <- rd <$> number
190 _ <- many (noneOf "\n" )
191 pure $ Date { year, month, day }
193 rd = read :: [Char] -> Integer
196 sourcePrefixP :: Parser [Char]
198 _ <- string "^@@source:"
200 sourceP :: Parser [Char]
201 sourceP = try sourcePrefixP
202 *> many (noneOf "\n")
204 -- contentsP :: Parser String
205 -- contentsP = many anyChar
207 tokenEnd :: Parser ()
208 tokenEnd = void (char '\n') <|> eof