]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Corpus/Parsers/Isidore.hs
merge done
[gargantext.git] / src / Gargantext / Text / Corpus / Parsers / Isidore.hs
1 {-|
2 Module : Gargantext.Text.Corpus.Parsers.Isidore
3 Description : To query French Humanities publication database
4 Copyright : (c) CNRS, 2019-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 TODO:
11 - put endpoint in configuration file
12 - more flexible fields of research
13 - type database name
14 - use more ontologies to help building corpora
15 -}
16
17
18 {-# LANGUAGE ScopedTypeVariables #-}
19
20 module Gargantext.Text.Corpus.Parsers.Isidore where
21
22 import Control.Lens hiding (contains)
23 import Data.ByteString.Lazy (ByteString)
24 import Data.RDF hiding (triple, Query)
25 import Data.Text hiding (groupBy, map)
26 import Database.HSparql.Connection
27 import Database.HSparql.QueryGenerator
28 import Gargantext.Core (Lang)
29 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
30 import Gargantext.Prelude
31 import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody)
32 import Prelude (String)
33
34 route :: EndPoint
35 route = "https://isidore.science/sparql/"
36
37 selectQueryRaw' :: String -> String -> IO (Response ByteString)
38 selectQueryRaw' uri q = getWith opts uri
39 where
40 opts = defaults & header "Accept" .~ ["application/sparql-results+xml"]
41 & header "User-Agent" .~ ["gargantext-hsparql-client"]
42 & param "query" .~ [Data.Text.pack q]
43
44 isidoreGet :: Lang -> Int -> Text -> IO (Maybe [HyperdataDocument])
45 isidoreGet la li q = do
46 bindingValues <- isidoreGet' li q
47 case bindingValues of
48 Nothing -> pure Nothing
49 Just dv -> pure $ Just $ map (bind2doc la) dv
50
51 isidoreGet' :: Int -> Text -> IO (Maybe [[BindingValue]])
52 isidoreGet' l q = do
53 let s = createSelectQuery $ isidoreSelect l q
54 putStrLn s
55 r <- selectQueryRaw' route s
56 putStrLn $ show $ r ^. responseStatus
57 pure $ structureContent $ r ^. responseBody
58 -- res <- selectQuery route $ simpleSelect q
59 -- pure res
60
61 isidoreSelect :: Int -> Text -> Query SelectQuery
62 isidoreSelect lim q = do
63 -- See Predefined Namespace Prefixes:
64 -- https://isidore.science/sparql?nsdecl
65 isidore <- prefix "isidore" (iriRef "http://isidore.science/class/")
66 rdf <- prefix "rdf" (iriRef "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
67 dcterms <- prefix "dcterms" (iriRef "http://purl.org/dc/terms/")
68 dc <- prefix "dc" (iriRef "http://purl.org/dc/elements/1.1/")
69 --iso <- prefix "fra" (iriRef "http://lexvo.org/id/iso639-3/")
70 --ore <- prefix "ore" (iriRef "http://www.openarchives.org/ore/terms/")
71 --bif <- prefix "bif" (iriRef "bif:")
72
73 link <- var
74 title <- var
75 date <- var
76 abstract <- var
77 authors <- var
78 source <- var
79 langDoc <- var
80 publisher <- var
81 --agg <- var
82
83 triple_ link (rdf .:. "type") (isidore .:. "Document")
84 triple_ link (dcterms .:. "title") title
85 triple_ link (dcterms .:. "date") date
86 triple_ link (dcterms .:. "creator") authors
87 --triple_ link (dcterms .:. "language") langDoc
88 triple_ link (dc .:. "description") abstract
89 --triple_ link (ore .:. "isAggregatedBy") agg
90 --triple_ agg (dcterms .:. "title") title
91
92 optional_ $ triple_ link (dcterms .:. "source") source
93 optional_ $ triple_ link (dcterms .:. "publisher") publisher
94
95 -- TODO FIX BUG with (.||.) operator
96 --filterExpr_ $ (.||.) (contains title q) (contains abstract q)
97 --filterExpr_ (containsWith authors q) -- (contains abstract q)
98 --filterExpr_ (containsWith title q) -- (contains abstract q)
99 --filterExpr_ $ (.||.) (containsWith title q) (contains abstract q)
100 filterExpr_ (containsWith title q)
101
102 -- TODO FIX filter with lang
103 --filterExpr_ $ langMatches title (str ("fra" :: Text))
104 --filterExpr_ $ (.==.) langDoc (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
105
106 orderNextDesc date
107 limit_ lim
108 distinct_
109 selectVars [link, date, langDoc, authors, source, publisher, title, abstract]
110
111 -- | TODO : check if all cases are taken into account
112 unbound :: Lang -> BindingValue -> Maybe Text
113 unbound _ Unbound = Nothing
114 unbound _ (Bound (UNode x)) = Just x
115 unbound _ (Bound (LNode (TypedL x _))) = Just x
116 unbound _ (Bound (LNode (PlainL x))) = Just x
117 unbound l (Bound (LNode (PlainLL x l'))) = if l' == (toLower $ cs $ show l) then Just x else Nothing
118 unbound _ _ = Nothing
119
120 bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
121 bind2doc l [ link, date, langDoc, authors, _source, publisher, title, abstract ] =
122 HyperdataDocument (Just "Isidore")
123 Nothing
124 (unbound l link)
125 Nothing Nothing Nothing
126 (unbound l title)
127 (unbound l authors)
128 Nothing
129 (unbound l publisher)
130 (unbound l abstract)
131 (unbound l date)
132 Nothing Nothing Nothing Nothing Nothing Nothing
133 (unbound l langDoc)
134
135 bind2doc _ _ = undefined