1 module Gargantext.Core.Text.Corpus.Parsers.FrameWrite where
3 import Control.Applicative ((*>))
4 import Control.Monad (void)
7 import Gargantext.Prelude
8 import Prelude (String, (++))
9 import Text.Parsec hiding (Line)
10 import Text.Parsec.Combinator
11 import Text.Parsec.String
14 -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/331
16 -- title : everything above the first ==
17 -- Authors : default : anonymous ; except if the following line is encountered ^@@authors: FirstName1, LastName1 ; FirstName2, LastName2 ; etc.
18 -- date : default : date of last change except if the following line is encountered ^@@date: 2021-09-10
19 -- source: Name of the root node except if the following line is encountered ^@@source:
20 -- By default, 1 framawrite node = 1 document. Option for further developments: allow to give a level at generation for the split within framawrite node : :
22 -- par défaut: un doc == 1 NodeWrite
23 -- ## mean each ## section will be a new document with title the subsubsection title. Either it features options for author, date etc. or it will inherit the document's option.
30 , "^@@authors: FirstName1, LastName1; FirstName2, LastName2"
31 , "^@@date: 2021-09-10"
32 , "^@@source: someSource"
33 , "document contents 1"
34 , "document contents 2"
42 , "document contents 1"
43 , "^@@date: 2021-09-10"
44 , "^@@authors: FirstName1, LastName1; FirstName2, LastName2"
45 , "^@@source: someSource"
46 , "document contents 2"
49 parseSample = parse documentP "sample" (unpack sample)
50 parseSampleUnordered = parse documentP "sampleUnordered" (unpack sampleUnordered)
51 parseLinesSample = parse documentLinesP "sample" (unpack sample)
52 parseLinesSampleUnordered = parse documentLinesP "sampleUnordered" (unpack sampleUnordered)
55 Author { firstName :: Text
60 Parsed { title :: Text
63 , source :: Maybe Text
82 parseLines :: Text -> Parsed
83 parseLines text = foldl f emptyParsed lst
85 lst = parse documentLinesP "" (unpack text)
86 f (Parsed { .. }) (LAuthors as) = Parsed { authors = as, .. }
87 f (Parsed { .. }) (LDate d ) = Parsed { date = d, .. }
88 f (Parsed { .. }) (LSource s ) = Parsed { source = s, .. }
89 f (Parsed { .. }) (LContents c) = Parsed { contents = contents ++ c, .. }
90 f (Parsed { .. }) (LTitle t ) = Parsed { title = t, .. }
94 lines <- lineP `sepBy` newline
95 pure $ [LTitle $ pack t] ++ lines
99 choice [ try authorsLineP
106 pure $ LAuthors authors
110 pure $ LDate $ pack date
114 pure $ LSource $ pack source
117 contents <- many (noneOf "\n")
118 pure $ LContents $ pack contents
124 a <- optionMaybe authorsP
125 d <- optionMaybe dateP
126 s <- optionMaybe sourceP
128 pure $ Parsed { title = pack t
129 , authors = fromMaybe [] a
131 , source = pack <$> s
132 , contents = pack c }
138 titleP :: Parser [Char]
139 titleP = manyTill anyChar (try titleDelimiterP)
142 _ <- string "^@@authors:"
144 authorsP :: Parser [Author]
145 authorsP = try authorsPrefixP *> sepBy authorP (char ';')
146 authorP :: Parser Author
148 fn <- manyTill anyChar (char ',')
150 --ln <- manyTill anyChar (void (char ';') <|> tokenEnd)
151 --ln <- manyTill anyChar (tokenEnd)
152 ln <- many (noneOf "\n")
153 pure $ Author { firstName = pack fn, lastName = pack ln }
154 -- manyTill anyChar (void (char '\n') <|> eof)
157 _ <- string "^@@date:"
159 dateP :: Parser [Char]
160 dateP = try datePrefixP
161 *> many (noneOf "\n")
164 _ <- string "^@@source:"
166 sourceP :: Parser [Char]
167 sourceP = try sourcePrefixP
168 *> many (noneOf "\n")
170 contentsP :: Parser String
171 contentsP = many anyChar
173 tokenEnd = void (char '\n') <|> eof