From 93f6d79129a9eeaa1586fda2b2413189f39b7f50 Mon Sep 17 00:00:00 2001 From: oskar Date: Fri, 19 Sep 2025 09:42:53 +0200 Subject: [PATCH] eduroam-prg-gm-1-3-245.net.univ-paris-diderot.fr 2025-9-19:9:42:53 --- M1 LOGOS .machine learning for NLP.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 M1 LOGOS .machine learning for NLP.md diff --git a/M1 LOGOS .machine learning for NLP.md b/M1 LOGOS .machine learning for NLP.md new file mode 100644 index 00000000..8bdc12bc --- /dev/null +++ b/M1 LOGOS .machine learning for NLP.md @@ -0,0 +1,20 @@ +--- +up: + - "[[M1 LOGOS]]" +tags: + - s/fac + - s/informatique +aliases: +--- + +# Vocabulary + +$\underbrace{(x_1, x_2, \dots, x_{n})}_{\text{vector of length } n} \in \mathbb{R}^{n}$ + +$x_{i} \in \mathbb{R}$ is a scalar + +one-hot : boolean vector with all zeroes but one value. Usefull if each dimension represents a word of the vocabulary + +You could represent sentences like that : +Let our vocabulary be : `V = 'le' 'un' 'garcon' 'lit' 'livre' 'regarde'` +Then "le garcon lit le livre" would be written by counting the number of occurences of each word of the sentence in a vector, so `2 0 1 1 1 0` (the formula is ) \ No newline at end of file