2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
16 module Gargantext.API.Node.DocumentsFromWriteNodes
20 import Control.Lens ((^.))
22 import Data.Either (Either(..), rights)
24 import qualified Data.Text as T
25 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
26 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
27 import Gargantext.API.Admin.Types (HasSettings)
28 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
29 import Gargantext.API.Prelude (GargM, GargError)
30 import Gargantext.Core (Lang(..))
31 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
32 import Gargantext.Core.Text.Terms (TermType(..))
33 import Gargantext.Core.Types.Individu (User(..))
34 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
35 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
36 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
37 import Gargantext.Database.Admin.Types.Hyperdata.Frame
38 import Gargantext.Database.Admin.Types.Node
39 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
40 import Gargantext.Database.Schema.Node (node_hyperdata)
41 import qualified Data.List as List
42 import qualified Gargantext.Defaults as Defaults
43 import Gargantext.Prelude
44 import Gargantext.Utils.Jobs (serveJobsAPI)
45 import GHC.Generics (Generic)
47 import Data.Text (Text)
48 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
49 import Text.Read (readMaybe)
50 import Data.Maybe (fromMaybe)
52 ------------------------------------------------------------------------
53 type API = Summary " Documents from Write nodes."
54 :> AsyncJobs JobLog '[JSON] Params JobLog
55 ------------------------------------------------------------------------
60 , selection :: FlowSocialListWith
62 deriving (Generic, Show)
63 instance FromJSON Params where
64 parseJSON = genericParseJSON defaultOptions
65 instance ToJSON Params where
66 toJSON = genericToJSON defaultOptions
67 instance ToSchema Params
68 ------------------------------------------------------------------------
69 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
71 serveJobsAPI DocumentFromWriteNodeJob $ \p log'' ->
75 in documentsFromWriteNodes uId nId p (liftBase . log')
77 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
83 documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } logStatus = do
84 let jobLog = JobLog { _scst_succeeded = Just 1
85 , _scst_failed = Just 0
86 , _scst_remaining = Just 1
87 , _scst_events = Just []
91 mcId <- getClosestParentIdByType' nId NodeCorpus
95 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
96 logStatus $ jobLogFailTotalWithMessage msg jobLog
99 frameWriteIds <- getChildrenByType nId NodeFrameWrite
101 -- https://write.frame.gargantext.org/<frame_id>/download
102 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
104 frameWritesWithContents <- liftBase $
106 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
107 pure (node, contents)
110 let paragraphs' = readMaybe $ T.unpack paragraphs :: Maybe Int
112 let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (fromMaybe 7 paragraphs') (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
113 -- TODO hard coded param should be take
114 let parsed = List.concat $ rights parsedE
116 _ <- flowDataText (RootId (NodeId uId)) (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed)) (Multi lang) cId (Just selection) logStatus
118 pure $ jobLogSuccess jobLog
119 ------------------------------------------------------------------------
120 hyperdataDocumentFromFrameWrite :: Int -> (HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
121 hyperdataDocumentFromFrameWrite paragraphSize (HyperdataFrame { _hf_base, _hf_frame_id }, contents) =
122 case parseLines contents of
123 Left _ -> Left "Error parsing node"
124 Right (Parsed { authors, contents = ctxts, date, source, title = t }) ->
125 let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
126 authors' = T.concat $ authorJoinSingle <$> authors
127 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
128 , T.pack $ show month, "-"
129 , T.pack $ show day ]) <$> date
130 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
131 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
132 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date in
133 Right (List.map (\ctxt -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
136 , _hd_uniqId = Nothing
137 , _hd_uniqIdBdd = Nothing
140 , _hd_authors = Just authors'
141 , _hd_institutes = Nothing
142 , _hd_source = source
143 , _hd_abstract = Just ctxt
144 , _hd_publication_date = date'
145 , _hd_publication_year = Just year'
146 , _hd_publication_month = Just month'
147 , _hd_publication_day = Just day'
148 , _hd_publication_hour = Nothing
149 , _hd_publication_minute = Nothing
150 , _hd_publication_second = Nothing
151 , _hd_language_iso2 = Just $ T.pack $ show EN }
152 ) (text2paragraphs paragraphSize ctxts)