2 Module : Gargantext.Text.Corpus.Parsers.Isidore
3 Description : To query French Humanities publication database
4 Copyright : (c) CNRS, 2019-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 - put endpoint in configuration file
12 - more flexible fields of research
14 - use more ontologies to help building corpora
18 {-# LANGUAGE ScopedTypeVariables #-}
20 module Gargantext.Text.Corpus.Parsers.Isidore where
22 import Control.Lens hiding (contains)
23 import Data.ByteString.Lazy (ByteString)
24 import Data.RDF hiding (triple, Query)
25 import Data.Text hiding (groupBy, map)
26 import Database.HSparql.Connection
27 import Database.HSparql.QueryGenerator
28 import Gargantext.Core (Lang)
29 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
30 import Gargantext.Prelude
31 import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody)
32 import Prelude (String)
35 route = "https://isidore.science/sparql/"
37 selectQueryRaw' :: String -> String -> IO (Response ByteString)
38 selectQueryRaw' uri q = getWith opts uri
40 opts = defaults & header "Accept" .~ ["application/sparql-results+xml"]
41 & header "User-Agent" .~ ["gargantext-hsparql-client"]
42 & param "query" .~ [Data.Text.pack q]
44 isidoreGet :: Lang -> Int -> Text -> IO (Maybe [HyperdataDocument])
45 isidoreGet la li q = do
46 bindingValues <- isidoreGet' li q
48 Nothing -> pure Nothing
49 Just dv -> pure $ Just $ map (bind2doc la) dv
51 isidoreGet' :: Int -> Text -> IO (Maybe [[BindingValue]])
53 let s = createSelectQuery $ isidoreSelect l q
55 r <- selectQueryRaw' route s
56 putStrLn $ show $ r ^. responseStatus
57 pure $ structureContent $ r ^. responseBody
58 -- res <- selectQuery route $ simpleSelect q
61 isidoreSelect :: Int -> Text -> Query SelectQuery
62 isidoreSelect lim q = do
63 -- See Predefined Namespace Prefixes:
64 -- https://isidore.science/sparql?nsdecl
65 isidore <- prefix "isidore" (iriRef "http://isidore.science/class/")
66 rdf <- prefix "rdf" (iriRef "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
67 dcterms <- prefix "dcterms" (iriRef "http://purl.org/dc/terms/")
68 dc <- prefix "dc" (iriRef "http://purl.org/dc/elements/1.1/")
69 --iso <- prefix "fra" (iriRef "http://lexvo.org/id/iso639-3/")
70 --ore <- prefix "ore" (iriRef "http://www.openarchives.org/ore/terms/")
71 --bif <- prefix "bif" (iriRef "bif:")
83 triple_ link (rdf .:. "type") (isidore .:. "Document")
84 triple_ link (dcterms .:. "title") title
85 triple_ link (dcterms .:. "date") date
86 triple_ link (dcterms .:. "creator") authors
87 --triple_ link (dcterms .:. "language") langDoc
88 triple_ link (dc .:. "description") abstract
89 --triple_ link (ore .:. "isAggregatedBy") agg
90 --triple_ agg (dcterms .:. "title") title
92 optional_ $ triple_ link (dcterms .:. "source") source
93 optional_ $ triple_ link (dcterms .:. "publisher") publisher
95 -- TODO FIX BUG with (.||.) operator
96 --filterExpr_ $ (.||.) (contains title q) (contains abstract q)
97 --filterExpr_ (containsWith authors q) -- (contains abstract q)
98 --filterExpr_ (containsWith title q) -- (contains abstract q)
99 --filterExpr_ $ (.||.) (containsWith title q) (contains abstract q)
100 filterExpr_ (containsWith title q)
102 -- TODO FIX filter with lang
103 --filterExpr_ $ langMatches title (str ("fra" :: Text))
104 --filterExpr_ $ (.==.) langDoc (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
109 selectVars [link, date, langDoc, authors, source, publisher, title, abstract]
111 -- | TODO : check if all cases are taken into account
112 unbound :: Lang -> BindingValue -> Maybe Text
113 unbound _ Unbound = Nothing
114 unbound _ (Bound (UNode x)) = Just x
115 unbound _ (Bound (LNode (TypedL x _))) = Just x
116 unbound _ (Bound (LNode (PlainL x))) = Just x
117 unbound l (Bound (LNode (PlainLL x l'))) = if l' == (toLower $ cs $ show l) then Just x else Nothing
118 unbound _ _ = Nothing
120 bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
121 bind2doc l [ link, date, langDoc, authors, _source, publisher, title, abstract ] =
122 HyperdataDocument (Just "Isidore")
125 Nothing Nothing Nothing
129 (unbound l publisher)
132 Nothing Nothing Nothing Nothing Nothing Nothing
135 bind2doc _ _ = undefined