-- (pairing)
where
+import Debug.Trace (trace)
import Control.Lens (_Just, (^.))
+import Data.Hashable (Hashable)
import Data.HashMap.Strict (HashMap)
-import qualified Data.HashMap.Strict as HM
-import Data.Maybe (catMaybes, fromMaybe)
+import Data.Maybe (fromMaybe, catMaybes)
import Data.Set (Set)
+import Data.Text (Text)
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Core
+import Gargantext.Core.Text.Metrics.CharByChar (levenshtein)
import Gargantext.Core.Types (TableResult(..))
import Gargantext.Core.Types.Main
import Gargantext.Database
import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
import Gargantext.Database.Query.Prelude (leftJoin2, returnA, queryNodeNodeTable)
+import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
-import Gargantext.Database.Query.Table.Node (defaultList)
+import Gargantext.Database.Query.Table.NodeContext_NodeContext (insertNodeContext_NodeContext)
import Gargantext.Database.Query.Table.NodeNode (insertNodeNode)
import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
import Gargantext.Database.Schema.Node
+-- import Gargantext.Database.Schema.Context
+import qualified Data.HashMap.Strict as HM
import Gargantext.Prelude hiding (sum)
import Opaleye
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Set as Set
-import qualified Data.Text as DT
+import qualified Data.Text as Text
-- | isPairedWith
-- All NodeAnnuaire paired with a Corpus of NodeId nId:
cond (node, node_node) = node^.node_id .== node_node^. nn_node2_id
-----------------------------------------------------------------------
-pairing :: AnnuaireId -> CorpusId -> Maybe ListId -> GargNoServer Int
+pairing :: AnnuaireId -> CorpusId -> Maybe ListId -> GargNoServer [Int]
pairing a c l' = do
l <- case l' of
Nothing -> defaultList c
Just l'' -> pure l''
- dataPaired <- dataPairing a (c,l,Authors) takeName takeName
- r <- insertDB $ prepareInsert dataPaired
- _ <- insertNodeNode [ NodeNode { _nn_node1_id = c
- , _nn_node2_id = a
- , _nn_score = Nothing
- , _nn_category = Nothing }]
- pure r
+ dataPaired <- dataPairing a (c,l,Authors)
+ _ <- insertNodeNode [ NodeNode c a Nothing Nothing]
+ insertNodeContext_NodeContext $ prepareInsert c a dataPaired
dataPairing :: AnnuaireId
-> (CorpusId, ListId, NgramsType)
- -> (ContactName -> Projected)
- -> (DocAuthor -> Projected)
-> GargNoServer (HashMap ContactId (Set DocId))
-dataPairing aId (cId, lId, ngt) fc fa = do
+dataPairing aId (cId, lId, ngt) = do
+ -- mc :: HM.HashMap ContactName (Set ContactId)
mc <- getNgramsContactId aId
- md <- getNgramsDocId cId lId ngt
-
- printDebug "ngramsContactId" mc
- printDebug "ngramsDocId" md
- let
- from = projectionFrom (Set.fromList $ HM.keys mc) fc
- to = projectionTo (Set.fromList $ HM.keys md) fa
-
- pure $ fusion mc $ align from to md
-
-
-
-prepareInsert :: HashMap ContactId (Set DocId) -> [NodeNode]
-prepareInsert m = map (\(n1,n2) -> NodeNode { _nn_node1_id = n1
- , _nn_node2_id = n2
- , _nn_score = Nothing
- , _nn_category = Nothing })
- $ List.concat
- $ map (\(contactId, setDocIds)
- -> map (\setDocId
- -> (contactId, setDocId)
- ) $ Set.toList setDocIds
- )
- $ HM.toList m
+ -- md :: HM.HashMap DocAuthor (Set DocId)
+ md <- getNgramsDocId cId lId ngt
+ -- printDebug "dataPairing authors" (HM.keys md)
+ let result = fusion mc md
+ -- printDebug "dataPairing" (length $ HM.keys result)
+ pure result
+
+
+prepareInsert :: CorpusId -> AnnuaireId -> HashMap ContactId (Set DocId)
+ -> [(CorpusId, AnnuaireId, DocId, ContactId)]
+prepareInsert corpusId annuaireId mapContactDocs =
+ map (\(contactId,docId) -> (corpusId, docId, annuaireId, contactId))
+ $ List.concat
+ $ map (\(contactId, setDocIds)
+ -> map (\setDocId
+ -> (contactId, setDocId)
+ ) $ Set.toList setDocIds
+ )
+ $ HM.toList mapContactDocs
------------------------------------------------------------------------
type ContactName = NgramsTerm
type DocAuthor = NgramsTerm
type Projected = NgramsTerm
-projectionFrom :: Set ContactName -> (ContactName -> Projected) -> HashMap ContactName Projected
-projectionFrom ss f = HM.fromList $ map (\s -> (s, f s)) (Set.toList ss) -- use HS.toMap
+fusion :: HashMap ContactName (Set ContactId)
+ -> HashMap DocAuthor (Set DocId)
+ -> HashMap ContactId (Set DocId)
+fusion mc md = HM.fromListWith (<>)
+ $ List.concat
+ $ map (\(docAuthor, docs)
+ -> case (getClosest Text.toLower docAuthor (HM.keys mc)) of
+ Nothing -> []
+ Just author -> case HM.lookup author mc of
+ Nothing -> []
+ Just contactIds -> map (\contactId -> (contactId, docs))
+ $ Set.toList contactIds
+ )
+ $ HM.toList md
+
+fusion'' :: HashMap ContactName (Set ContactId)
+ -> HashMap DocAuthor (Set DocId)
+ -> HashMap ContactId (Set DocId)
+fusion'' mc md = hashmapReverse $ fusion' mc (hashmapReverse md)
+
-projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> HashMap Projected (Set DocAuthor)
-projectionTo ss f = HM.fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss) -- use HS.toMap
-------------------------------------------------------------------------
-takeName :: NgramsTerm -> NgramsTerm
-takeName (NgramsTerm texte) = NgramsTerm $ DT.toLower texte'
+fusion' :: HashMap ContactName (Set ContactId)
+ -> HashMap DocId (Set DocAuthor)
+ -> HashMap DocId (Set ContactId)
+fusion' mc md = HM.fromListWith (<>)
+ $ map (\(docId, setAuthors) -> (docId, getContactIds mc $ getClosest' setAuthors (HM.keys mc)))
+ $ HM.toList md
+
+getContactIds :: HashMap ContactName (Set ContactId) -> Set ContactName -> Set ContactId
+getContactIds mapContactNames contactNames =
+ if Set.null contactNames
+ then Set.empty
+ else Set.unions $ catMaybes $ map (\contactName -> HM.lookup contactName mapContactNames) $ Set.toList contactNames
+
+getClosest' :: Set DocAuthor -> [ContactName] -> Set ContactName
+getClosest' setAuthors contactNames = trace (show (setAuthors, setContactNames)) $ setContactNames
where
- texte' = maybe texte (\x -> if DT.length x > 3 then x else texte)
- (lastName' texte)
- lastName' = lastMay . DT.splitOn " "
+ setContactNames = if Set.null xs then ys else xs
+ xs = Set.fromList $ catMaybes $ map (\author -> getClosest Text.toLower author contactNames) $ Set.toList setAuthors
+ ys = Set.fromList $ catMaybes $ map (\(NgramsTerm author) -> case ((lastMay . (Text.splitOn " ")) author) of
+ Nothing -> Nothing
+ Just authorReduced -> getClosest Text.toLower (NgramsTerm authorReduced) contactNames)
+ $ Set.toList setAuthors
-------------------------------------------------------------------------
-align :: HashMap ContactName Projected
- -> HashMap Projected (Set DocAuthor)
- -> HashMap DocAuthor (Set DocId)
- -> HashMap ContactName (Set DocId)
-align mc ma md = HM.fromListWith (<>)
- $ map (\c -> (c, getProjection md $ testProjection c mc ma))
- $ HM.keys mc
+getClosest :: (Text -> Text) -> NgramsTerm -> [NgramsTerm] -> Maybe NgramsTerm
+getClosest f (NgramsTerm from) candidates = fst <$> head scored
where
- getProjection :: HashMap DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
- getProjection ma' sa' =
- if Set.null sa'
- then Set.empty
- else Set.unions $ sets ma' sa'
- where
- sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
- lookup s' ma''= fromMaybe Set.empty (HM.lookup s' ma'')
-
- testProjection :: ContactName
- -> HashMap ContactName Projected
- -> HashMap Projected (Set DocAuthor)
- -> Set DocAuthor
- testProjection cn' mc' ma' = case HM.lookup cn' mc' of
- Nothing -> Set.empty
- Just c -> case HM.lookup c ma' of
- Nothing -> Set.empty
- Just a -> a
+ scored = List.sortOn snd
+ $ List.filter (\(_,score) -> score <= 2)
+ $ map (\cand@(NgramsTerm candidate) -> (cand, levenshtein (f from) (f candidate))) candidates
-fusion :: HashMap ContactName (Set ContactId)
- -> HashMap ContactName (Set DocId)
- -> HashMap ContactId (Set DocId)
-fusion mc md = HM.fromListWith (<>)
- $ catMaybes
- $ [ (,) <$> Just cId <*> HM.lookup cn md
- | (cn, setContactId) <- HM.toList mc
- , cId <- Set.toList setContactId
- ]
-------------------------------------------------------------------------
+------------------------------------------------------------------------
getNgramsContactId :: AnnuaireId
-> Cmd err (HashMap ContactName (Set NodeId))
getNgramsContactId aId = do
contacts <- getAllContacts aId
- pure $ HM.fromListWith (<>)
- $ catMaybes
- $ map (\contact -> (,) <$> (NgramsTerm <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName))
- <*> Just ( Set.singleton (contact^.node_id))
+ -- printDebug "getAllContexts" (tr_count contacts)
+ let paired= HM.fromListWith (<>)
+ $ map (\contact -> (toName contact, Set.singleton (contact^.node_id))
) (tr_docs contacts)
-
+ -- printDebug "paired" (HM.keys paired)
+ pure paired
+-- POC here, should be a probabilistic function (see the one used to find lang)
+toName :: Node HyperdataContact -> NgramsTerm
+-- toName contact = NgramsTerm $ (Text.toTitle $ Text.take 1 firstName) <> ". " <> (Text.toTitle lastName)
+toName contact = NgramsTerm $ (Text.toTitle firstName) <> " " <> (Text.toTitle lastName)
+ where
+ firstName = fromMaybe "" $ contact^.(node_hyperdata . hc_who . _Just . cw_firstName)
+ lastName = fromMaybe "" $ contact^.(node_hyperdata . hc_who . _Just . cw_lastName)
getNgramsDocId :: CorpusId
-> ListId
-> GargNoServer (HashMap DocAuthor (Set NodeId))
getNgramsDocId cId lId nt = do
lIds <- selectNodesWithUsername NodeList userMaster
- repo <- getRepo' lIds
- let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
-
- groupNodesByNgrams ngs
- <$> getContextsByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
+ repo <- getRepo (lId:lIds)
+ let ngs = filterListWithRoot [MapTerm, CandidateTerm] $ mapTermListRoot (lId:lIds) nt repo
+ -- printDebug "getNgramsDocId" ngs
+
+ groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
+
+hashmapReverse :: (Ord a, Eq b, Hashable b)
+ => HashMap a (Set b) -> HashMap b (Set a)
+hashmapReverse m = HM.fromListWith (<>)
+ $ List.concat
+ $ map (\(k,vs) -> [ (v, Set.singleton k) | v <- Set.toList vs])
+ $ HM.toList m