2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15 {-# OPTIONS_GHC -fno-warn-orphans #-}
17 module Gargantext.API.Node.DocumentsFromWriteNodes
20 import Control.Lens ((^.))
22 import Data.Either (Either(..), rights)
24 import qualified Data.Text as T
25 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
26 import Gargantext.API.Admin.Types (HasSettings)
27 import Gargantext.API.Prelude (GargServer)
28 import Gargantext.Core (Lang(..))
29 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
30 import Gargantext.Core.Text.Terms (TermType(..))
31 import Gargantext.Core.Types.Individu (User(..))
32 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
33 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
34 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
35 import Gargantext.Database.Admin.Types.Hyperdata.Frame
36 import Gargantext.Database.Admin.Types.Node
37 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
38 import Gargantext.Database.Schema.Node (node_hyperdata)
39 import Gargantext.Prelude
40 import GHC.Generics (Generic)
42 import Servant.Job.Async (JobFunction(..), serveJobsAPI)
44 ------------------------------------------------------------------------
45 type API = Summary " Documents from Write nodes."
46 :> AsyncJobs JobLog '[JSON] Params JobLog
47 ------------------------------------------------------------------------
48 newtype Params = Params { id :: Int }
49 deriving (Generic, Show)
51 instance FromJSON Params where
52 parseJSON = genericParseJSON defaultOptions
53 instance ToJSON Params where
54 toJSON = genericToJSON defaultOptions
55 instance ToSchema Params
56 ------------------------------------------------------------------------
57 api :: UserId -> NodeId -> GargServer API
60 JobFunction (\p log'' ->
64 in documentsFromWriteNodes uId nId p (liftBase . log')
67 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
73 documentsFromWriteNodes uId nId _p logStatus = do
75 logStatus JobLog { _scst_succeeded = Just 1
76 , _scst_failed = Just 0
77 , _scst_remaining = Just 1
78 , _scst_events = Just []
81 mcId <- getClosestParentIdByType' nId NodeCorpus
82 let cId = maybe (panic "[G.A.N.DFWN] Node has no parent") identity mcId
84 frameWriteIds <- getChildrenByType nId NodeFrameWrite
86 -- https://write.frame.gargantext.org/<frame_id>/download
87 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
89 frameWritesWithContents <- liftBase $
91 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
95 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
96 let parsed = rights parsedE
98 _ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing
100 pure JobLog { _scst_succeeded = Just 2
101 , _scst_failed = Just 0
102 , _scst_remaining = Just 0
103 , _scst_events = Just []
105 ------------------------------------------------------------------------
106 hyperdataDocumentFromFrameWrite :: (HyperdataFrame, T.Text) -> Either T.Text HyperdataDocument
107 hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
108 case parseLines contents of
109 Left _ -> Left "Error parsing node"
110 Right (Parsed { authors, contents = c, date, source, title = t }) ->
111 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
112 authors' = T.concat $ authorJoinSingle <$> authors
113 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
114 , T.pack $ show month, "-"
115 , T.pack $ show day ]) <$> date
116 year' = fromIntegral $ maybe 2021 (\(Date { year }) -> year) date
117 month' = fromIntegral $ maybe 10 (\(Date { month }) -> month) date
118 day' = fromIntegral $ maybe 4 (\(Date { day }) -> day) date in
119 Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
122 , _hd_uniqId = Nothing
123 , _hd_uniqIdBdd = Nothing
126 , _hd_authors = Just authors'
127 , _hd_institutes = Nothing
128 , _hd_source = source
129 , _hd_abstract = Just c
130 , _hd_publication_date = date'
131 , _hd_publication_year = Just year'
132 , _hd_publication_month = Just month'
133 , _hd_publication_day = Just day'
134 , _hd_publication_hour = Nothing
135 , _hd_publication_minute = Nothing
136 , _hd_publication_second = Nothing
137 , _hd_language_iso2 = Just $ T.pack $ show EN }