]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
Merge remote-tracking branch 'origin/184-dev-add-nlp-to-gargantext-ini' into dev...
[gargantext.git] / src / Gargantext / API / Node / DocumentsFromWriteNodes.hs
1 {-|
2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15
16 module Gargantext.API.Node.DocumentsFromWriteNodes
17 where
18
19 -- import Data.Maybe (fromMaybe)
20 import Conduit
21 import Control.Lens ((^.))
22 import Data.Aeson
23 import Data.Either (Either(..), rights)
24 import Data.Maybe (fromMaybe)
25 import Data.Swagger
26 import Data.Text (Text)
27 import GHC.Generics (Generic)
28 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
29 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
30 import Gargantext.API.Admin.Types (HasSettings)
31 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
32 import Gargantext.API.Prelude (GargM, GargError)
33 import Gargantext.Core (Lang(..))
34 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
35 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
36 import Gargantext.Core.Text.Terms (TermType(..))
37 import Gargantext.Core.Types.Individu (User(..))
38 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
39 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
40 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
41 import Gargantext.Database.Admin.Types.Hyperdata.Frame
42 import Gargantext.Database.Admin.Types.Node
43 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
44 import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
45 import Gargantext.Prelude
46 import Gargantext.Utils.Jobs (serveJobsAPI, jobHandleLogger)
47 import Gargantext.Core.Text.Corpus.Parsers.Date (split')
48 import Servant
49 import Text.Read (readMaybe)
50 import qualified Data.List as List
51 import qualified Data.Text as T
52 -- import qualified Gargantext.Defaults as Defaults
53
54 ------------------------------------------------------------------------
55 type API = Summary " Documents from Write nodes."
56 :> AsyncJobs JobLog '[JSON] Params JobLog
57 ------------------------------------------------------------------------
58 data Params = Params
59 { id :: Int
60 , paragraphs :: Text
61 , lang :: Lang
62 , selection :: FlowSocialListWith
63 }
64 deriving (Generic, Show)
65 instance FromJSON Params where
66 parseJSON = genericParseJSON defaultOptions
67 instance ToJSON Params where
68 toJSON = genericToJSON defaultOptions
69 instance ToSchema Params
70 ------------------------------------------------------------------------
71 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
72 api uId nId =
73 serveJobsAPI DocumentFromWriteNodeJob $ \jHandle p ->
74 documentsFromWriteNodes uId nId p (jobHandleLogger jHandle)
75
76 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
77 => UserId
78 -> NodeId
79 -> Params
80 -> (JobLog -> m ())
81 -> m JobLog
82 documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } logStatus = do
83 let jobLog = JobLog { _scst_succeeded = Just 1
84 , _scst_failed = Just 0
85 , _scst_remaining = Just 1
86 , _scst_events = Just []
87 }
88 logStatus jobLog
89
90 mcId <- getClosestParentIdByType' nId NodeCorpus
91 cId <- case mcId of
92 Just cId -> pure cId
93 Nothing -> do
94 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
95 logStatus $ jobLogFailTotalWithMessage msg jobLog
96 panic msg
97
98 frameWriteIds <- getChildrenByType nId NodeFrameWrite
99
100 -- https://write.frame.gargantext.org/<frame_id>/download
101 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
102
103 frameWritesWithContents <- liftBase $
104 mapM (\node -> do
105 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
106 pure (node, contents)
107 ) frameWrites
108
109 let paragraphs' = fromMaybe (7 :: Int) $ (readMaybe $ T.unpack paragraphs)
110 let parsedE = (\(node, contents)
111 -> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents
112 let parsed = List.concat $ rights parsedE
113 -- printDebug "DocumentsFromWriteNodes: uId" uId
114 _ <- flowDataText (RootId (NodeId uId))
115 (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
116 (Multi lang)
117 cId
118 (Just selection)
119 logStatus
120
121 pure $ jobLogSuccess jobLog
122
123 ------------------------------------------------------------------------
124 hyperdataDocumentFromFrameWrite :: Lang -> Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
125 hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
126 case parseLines contents of
127 Left _ -> Left "Error parsing node"
128 Right (Parsed { authors, contents = ctxts}) ->
129 let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
130 authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
131 authors' = T.concat $ authorJoinSingle <$> authors
132
133 --{-
134 (year',month',day') = split' (node^. node_date)
135 date' = Just $ T.concat [ T.pack $ show year', "-"
136 , T.pack $ show month', "-"
137 , T.pack $ show day'
138 ]
139 --}
140
141 {-
142 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year', "-"
143 , T.pack $ show month', "-"
144 , T.pack $ show day' ]) <$> date
145 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
146 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
147 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date
148 --}
149 in
150 Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
151 , _hd_doi = Nothing
152 , _hd_url = Nothing
153 , _hd_uniqId = Nothing
154 , _hd_uniqIdBdd = Nothing
155 , _hd_page = Nothing
156 , _hd_title = Just t
157 , _hd_authors = Just authors'
158 , _hd_institutes = Nothing
159 , _hd_source = Just $ node ^. node_name
160 , _hd_abstract = Just ctxt
161 , _hd_publication_date = date'
162 , _hd_publication_year = Just year'
163 , _hd_publication_month = Just month'
164 , _hd_publication_day = Just day'
165 , _hd_publication_hour = Nothing
166 , _hd_publication_minute = Nothing
167 , _hd_publication_second = Nothing
168 , _hd_language_iso2 = Just $ T.pack $ show lang }
169 ) (text2titleParagraphs paragraphSize ctxts)
170 )