2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
16 module Gargantext.API.Node.DocumentsFromWriteNodes
19 -- import Data.Maybe (fromMaybe)
21 import Control.Lens ((^.))
23 import Data.Either (Either(..), rights)
24 import Data.Maybe (fromMaybe)
26 import Data.Text (Text)
27 import GHC.Generics (Generic)
28 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
29 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
30 import Gargantext.API.Admin.Types (HasSettings)
31 import Gargantext.API.Prelude (GargM, GargError)
32 import Gargantext.Core (Lang(..))
33 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
34 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
35 import Gargantext.Core.Text.Terms (TermType(..))
36 import Gargantext.Core.Types.Individu (User(..))
37 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
38 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
39 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
40 import Gargantext.Database.Admin.Types.Hyperdata.Frame
41 import Gargantext.Database.Admin.Types.Node
42 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
43 import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
44 import Gargantext.Prelude
45 import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
46 import Gargantext.Core.Text.Corpus.Parsers.Date (split')
48 import Text.Read (readMaybe)
49 import qualified Data.List as List
50 import qualified Data.Text as T
51 -- import qualified Gargantext.Defaults as Defaults
53 ------------------------------------------------------------------------
54 type API = Summary " Documents from Write nodes."
55 :> AsyncJobs JobLog '[JSON] Params JobLog
56 ------------------------------------------------------------------------
61 , selection :: FlowSocialListWith
63 deriving (Generic, Show)
64 instance FromJSON Params where
65 parseJSON = genericParseJSON defaultOptions
66 instance ToJSON Params where
67 toJSON = genericToJSON defaultOptions
68 instance ToSchema Params
69 ------------------------------------------------------------------------
70 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
72 serveJobsAPI DocumentFromWriteNodeJob $ \jHandle p ->
73 documentsFromWriteNodes uId nId p jHandle
75 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m, MonadJobStatus m)
81 documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } jobHandle = do
82 markStarted 2 jobHandle
83 markProgress 1 jobHandle
85 mcId <- getClosestParentIdByType' nId NodeCorpus
89 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
90 markFailed (Just msg) jobHandle
93 frameWriteIds <- getChildrenByType nId NodeFrameWrite
95 -- https://write.frame.gargantext.org/<frame_id>/download
96 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
98 frameWritesWithContents <- liftBase $
100 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
101 pure (node, contents)
104 let paragraphs' = fromMaybe (7 :: Int) $ (readMaybe $ T.unpack paragraphs)
105 let parsedE = (\(node, contents)
106 -> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents
107 let parsed = List.concat $ rights parsedE
108 -- printDebug "DocumentsFromWriteNodes: uId" uId
109 _ <- flowDataText (RootId (NodeId uId))
110 (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
116 markProgress 1 jobHandle
118 ------------------------------------------------------------------------
119 hyperdataDocumentFromFrameWrite :: Lang -> Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
120 hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
121 case parseLines contents of
122 Left _ -> Left "Error parsing node"
123 Right (Parsed { authors, contents = ctxts}) ->
124 let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
125 authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
126 authors' = T.concat $ authorJoinSingle <$> authors
129 (year',month',day') = split' (node^. node_date)
130 date' = Just $ T.concat [ T.pack $ show year', "-"
131 , T.pack $ show month', "-"
137 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year', "-"
138 , T.pack $ show month', "-"
139 , T.pack $ show day' ]) <$> date
140 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
141 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
142 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date
145 Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
148 , _hd_uniqId = Nothing
149 , _hd_uniqIdBdd = Nothing
152 , _hd_authors = Just authors'
153 , _hd_institutes = Nothing
154 , _hd_source = Just $ node ^. node_name
155 , _hd_abstract = Just ctxt
156 , _hd_publication_date = date'
157 , _hd_publication_year = Just year'
158 , _hd_publication_month = Just month'
159 , _hd_publication_day = Just day'
160 , _hd_publication_hour = Nothing
161 , _hd_publication_minute = Nothing
162 , _hd_publication_second = Nothing
163 , _hd_language_iso2 = Just $ T.pack $ show lang }
164 ) (text2titleParagraphs paragraphSize ctxts)