]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
refactoring temporal matching
[gargantext.git] / src / Gargantext / API / Node / DocumentsFromWriteNodes.hs
1 {-|
2 Module : Gargantext.API.Node.DocumentsFromWriteNodes
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE MonoLocalBinds #-}
13 {-# LANGUAGE TemplateHaskell #-}
14 {-# LANGUAGE TypeOperators #-}
15
16 module Gargantext.API.Node.DocumentsFromWriteNodes
17 where
18
19 -- import Data.Maybe (fromMaybe)
20 import Conduit
21 import Control.Lens ((^.))
22 import Data.Aeson
23 import Data.Either (Either(..), rights)
24 import Data.Maybe (fromMaybe)
25 import Data.Swagger
26 import Data.Text (Text)
27 import GHC.Generics (Generic)
28 import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
29 import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
30 import Gargantext.API.Admin.Types (HasSettings)
31 import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
32 import Gargantext.API.Prelude (GargM, GargError)
33 import Gargantext.Core (Lang(..))
34 import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
35 import Gargantext.Core.Text.List.Social (FlowSocialListWith)
36 import Gargantext.Core.Text.Terms (TermType(..))
37 import Gargantext.Core.Types.Individu (User(..))
38 import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
39 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
40 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
41 import Gargantext.Database.Admin.Types.Hyperdata.Frame
42 import Gargantext.Database.Admin.Types.Node
43 import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
44 import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
45 import Gargantext.Prelude
46 import Gargantext.Utils.Jobs (serveJobsAPI)
47 import Gargantext.Core.Text.Corpus.Parsers.Date (split')
48 import Servant
49 import Text.Read (readMaybe)
50 import qualified Data.List as List
51 import qualified Data.Text as T
52 -- import qualified Gargantext.Defaults as Defaults
53
54 ------------------------------------------------------------------------
55 type API = Summary " Documents from Write nodes."
56 :> AsyncJobs JobLog '[JSON] Params JobLog
57 ------------------------------------------------------------------------
58 data Params = Params
59 { id :: Int
60 , paragraphs :: Text
61 , lang :: Lang
62 , selection :: FlowSocialListWith
63 }
64 deriving (Generic, Show)
65 instance FromJSON Params where
66 parseJSON = genericParseJSON defaultOptions
67 instance ToJSON Params where
68 toJSON = genericToJSON defaultOptions
69 instance ToSchema Params
70 ------------------------------------------------------------------------
71 api :: UserId -> NodeId -> ServerT API (GargM Env GargError)
72 api uId nId =
73 serveJobsAPI DocumentFromWriteNodeJob $ \p log'' ->
74 let
75 log' x = do
76 liftBase $ log'' x
77 in documentsFromWriteNodes uId nId p (liftBase . log')
78
79 documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
80 => UserId
81 -> NodeId
82 -> Params
83 -> (JobLog -> m ())
84 -> m JobLog
85 documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } logStatus = do
86 let jobLog = JobLog { _scst_succeeded = Just 1
87 , _scst_failed = Just 0
88 , _scst_remaining = Just 1
89 , _scst_events = Just []
90 }
91 logStatus jobLog
92
93 mcId <- getClosestParentIdByType' nId NodeCorpus
94 cId <- case mcId of
95 Just cId -> pure cId
96 Nothing -> do
97 let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
98 logStatus $ jobLogFailTotalWithMessage msg jobLog
99 panic msg
100
101 frameWriteIds <- getChildrenByType nId NodeFrameWrite
102
103 -- https://write.frame.gargantext.org/<frame_id>/download
104 frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
105
106 frameWritesWithContents <- liftBase $
107 mapM (\node -> do
108 contents <- getHyperdataFrameContents (node ^. node_hyperdata)
109 pure (node, contents)
110 ) frameWrites
111
112 let paragraphs' = fromMaybe (7 :: Int) $ (readMaybe $ T.unpack paragraphs)
113 let parsedE = (\(node, contents)
114 -> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents
115 let parsed = List.concat $ rights parsedE
116 -- printDebug "DocumentsFromWriteNodes: uId" uId
117 _ <- flowDataText (RootId (NodeId uId))
118 (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
119 (Multi lang)
120 cId
121 (Just selection)
122 logStatus
123
124 pure $ jobLogSuccess jobLog
125
126 ------------------------------------------------------------------------
127 hyperdataDocumentFromFrameWrite :: Lang -> Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
128 hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
129 case parseLines contents of
130 Left _ -> Left "Error parsing node"
131 Right (Parsed { authors, contents = ctxts}) ->
132 let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
133 authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
134 authors' = T.concat $ authorJoinSingle <$> authors
135
136 --{-
137 (year',month',day') = split' (node^. node_date)
138 date' = Just $ T.concat [ T.pack $ show year', "-"
139 , T.pack $ show month', "-"
140 , T.pack $ show day'
141 ]
142 --}
143
144 {-
145 date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year', "-"
146 , T.pack $ show month', "-"
147 , T.pack $ show day' ]) <$> date
148 year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
149 month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
150 day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date
151 --}
152 in
153 Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
154 , _hd_doi = Nothing
155 , _hd_url = Nothing
156 , _hd_uniqId = Nothing
157 , _hd_uniqIdBdd = Nothing
158 , _hd_page = Nothing
159 , _hd_title = Just t
160 , _hd_authors = Just authors'
161 , _hd_institutes = Nothing
162 , _hd_source = Just $ node ^. node_name
163 , _hd_abstract = Just ctxt
164 , _hd_publication_date = date'
165 , _hd_publication_year = Just year'
166 , _hd_publication_month = Just month'
167 , _hd_publication_day = Just day'
168 , _hd_publication_hour = Nothing
169 , _hd_publication_minute = Nothing
170 , _hd_publication_second = Nothing
171 , _hd_language_iso2 = Just $ T.pack $ show lang }
172 ) (text2titleParagraphs paragraphSize ctxts)
173 )