From 38917c6afd1eaf3b789aac934e962588e766ed42 Mon Sep 17 00:00:00 2001 From: oskar Date: Fri, 19 Sep 2025 10:02:53 +0200 Subject: [PATCH] eduroam-prg-gm-1-3-245.net.univ-paris-diderot.fr 2025-9-19:10:2:53 --- .obsidian/graph.json | 2 +- M1 LOGOS .machine learning for NLP.md | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.obsidian/graph.json b/.obsidian/graph.json index 3c074311..49337cc7 100644 --- a/.obsidian/graph.json +++ b/.obsidian/graph.json @@ -130,6 +130,6 @@ "repelStrength": 5.263671875, "linkStrength": 1, "linkDistance": 30, - "scale": 2.422433756553115, + "scale": 0.15541659193590038, "close": true } \ No newline at end of file diff --git a/M1 LOGOS .machine learning for NLP.md b/M1 LOGOS .machine learning for NLP.md index 8bdc12bc..6bd7543f 100644 --- a/M1 LOGOS .machine learning for NLP.md +++ b/M1 LOGOS .machine learning for NLP.md @@ -15,6 +15,10 @@ $x_{i} \in \mathbb{R}$ is a scalar one-hot : boolean vector with all zeroes but one value. Usefull if each dimension represents a word of the vocabulary +BOW : Bag Of Words You could represent sentences like that : Let our vocabulary be : `V = 'le' 'un' 'garcon' 'lit' 'livre' 'regarde'` -Then "le garcon lit le livre" would be written by counting the number of occurences of each word of the sentence in a vector, so `2 0 1 1 1 0` (the formula is ) \ No newline at end of file +Then "le garcon lit le livre" would be written by counting the number of occurences of each word of the sentence in a vector, so `2 0 1 1 1 0` (the formula is `sentence +⌿⍤(∘.≡) vocabulary`) + +$\cos(u, v) = \frac{u\cdot v}{\|u\| \| v\|}$ +