defcosine_similarity(u, v): """ Cosine similarity reflects the degree of similariy between u and v Arguments: u -- a word vector of shape (n,) v -- a word vector of shape (n,) Returns: cosine_similarity -- the cosine similarity between u and v defined by the formula above. """ distance = 0.0 ### START CODE HERE ### # Compute the dot product between u and v (≈1 line) dot = np.dot(u,v) # Compute the L2 norm of u (≈1 line) norm_u = np.sqrt(np.dot(u,u)) # Compute the L2 norm of v (≈1 line) norm_v = np.sqrt(np.dot(v,v)) # Compute the cosine similarity defined by formula (1) (≈1 line) cosine_similarity = dot / (norm_u * norm_v) ### END CODE HERE ### return cosine_similarity
单词类比推理
类比推理任务中需要实现”a is to b as c is to __” 比如”man is to woman as king is to queen”。我们需要找到单词 d,使得”e_b−e_a ≈ e_d−e_c” 也就是两组的差向量应该相似(仍然用 cos 来衡量)
defcomplete_analogy(word_a, word_b, word_c, word_to_vec_map): """ Performs the word analogy task as explained above: a is to b as c is to ____. Arguments: word_a -- a word, string word_b -- a word, string word_c -- a word, string word_to_vec_map -- dictionary that maps words to their corresponding vectors. Returns: best_word -- the word such that v_b - v_a is close to v_best_word - v_c, as measured by cosine similarity """ # convert words to lower case word_a, word_b, word_c = word_a.lower(), word_b.lower(), word_c.lower() ### START CODE HERE ### # Get the word embeddings v_a, v_b and v_c (≈1-3 lines) e_a, e_b, e_c = word_to_vec_map[word_a], word_to_vec_map[word_b], word_to_vec_map[word_c] ### END CODE HERE ### words = word_to_vec_map.keys() max_cosine_sim = -100# Initialize max_cosine_sim to a large negative number best_word = None# Initialize best_word with None, it will help keep track of the word to output
# loop over the whole word vector set for w in words: # to avoid best_word being one of the input words, pass on them. if w in [word_a, word_b, word_c] : continue ### START CODE HERE ### # Compute cosine similarity between the vector (e_b - e_a) and the vector ((w's vector representation) - e_c) (≈1 line) cosine_sim = cosine_similarity(e_b - e_a, word_to_vec_map[w] - e_c) # If the cosine_sim is more than the max_cosine_sim seen so far, # then: set the new max_cosine_sim to the current cosine_sim and the best_word to the current word (≈3 lines) if cosine_sim > max_cosine_sim: max_cosine_sim = cosine_sim best_word = w ### END CODE HERE ### return best_word
defneutralize(word, g, word_to_vec_map): """ Removes the bias of "word" by projecting it on the space orthogonal to the bias axis. This function ensures that gender neutral words are zero in the gender subspace. Arguments: word -- string indicating the word to debias g -- numpy-array of shape (50,), corresponding to the bias axis (such as gender) word_to_vec_map -- dictionary mapping words to their corresponding vectors. Returns: e_debiased -- neutralized word vector representation of the input "word" """ ### START CODE HERE ### # Select word vector representation of "word". Use word_to_vec_map. (≈ 1 line) e = word_to_vec_map[word] # Compute e_biascomponent using the formula give above. (≈ 1 line) e_biascomponent = np.dot(e, g) / np.square(np.linalg.norm(g)) * g # Neutralize e by substracting e_biascomponent from it # e_debiased should be equal to its orthogonal projection. (≈ 1 line) e_debiased = e - e_biascomponent ### END CODE HERE ### return e_debiased
defequalize(pair, bias_axis, word_to_vec_map): """ Debias gender specific words by following the equalize method described in the figure above. Arguments: pair -- pair of strings of gender specific words to debias, e.g. ("actress", "actor") bias_axis -- numpy-array of shape (50,), vector corresponding to the bias axis, e.g. gender word_to_vec_map -- dictionary mapping words to their corresponding vectors Returns e_1 -- word vector corresponding to the first word e_2 -- word vector corresponding to the second word """ ### START CODE HERE ### # Step 1: Select word vector representation of "word". Use word_to_vec_map. (≈ 2 lines) w1, w2 = pair e_w1, e_w2 = word_to_vec_map[w1, w2] # Step 2: Compute the mean of e_w1 and e_w2 (≈ 1 line) mu = (e_w1 + e_w2) / 2
# Step 3: Compute the projections of mu over the bias axis and the orthogonal axis (≈ 2 lines) mu_B = np.dot(mu, bias_axis) / np.square(np.linalg.norm(bias_axis)) * bias_axis mu_orth = mu - mu_B
# Step 4: Use equations (7) and (8) to compute e_w1B and e_w2B (≈2 lines) e_w1B = np.dot(e_w1, bias_axis) / np.square(np.linalg.norm(bias_axis)) * bias_axis e_w2B = np.dot(e_w2, bias_axis) / np.square(np.linalg.norm(bias_axis)) * bias_axis # Step 5: Adjust the Bias part of e_w1B and e_w2B using the formulas (9) and (10) given above (≈2 lines) corrected_e_w1B = np.sqrt(np.abs(1-np.sum(mu_orth**2))) * (e_w1B - mu_B)/np.linalg.norm(e_w1-mu_orth-mu_B) corrected_e_w2B = np.sqrt(np.abs(1-np.sum(mu_orth**2))) * (e_w2B - mu_B)/np.linalg.norm(e_w2-mu_orth-mu_B)
# Step 6: Debias by equalizing e1 and e2 to the sum of their corrected projections (≈2 lines) e1 = corrected_e_w1B + mu_orth e2 = corrected_e_w2B + mu_orth ### END CODE HERE ### return e1, e2
Part2:Emojify!
你有没有想过让你的短信更具表现力? emojifier APP将帮助你做到这一点。 所以不是写下”Congratulations on the promotion! Lets get coffee and talk. Love you!” emojifier可以自动转换为 “Congratulations on the promotion! ? Lets get coffee and talk. ☕️ Love you! ❤️”
你将实现一个模型,输入一个句子(“Let’s go see the baseball game tonight!”),并找到最适合这个句子的表情符号(⚾️)。 在许多表情符号界面中,您需要记住❤️是”heart”符号而不是”love”符号。 但是使用单词向量,你会发现即使你的训练集只将几个单词明确地与特定的表情符号相关联,你的算法也能够将测试集中相关的单词概括并关联到相同的表情符号上,即使这些词没有出现在训练集中。这使得即使使用小型训练集,你也可以建立从句子到表情符号的精确分类器映射。
defsentence_to_avg(sentence, word_to_vec_map): """ Converts a sentence (string) into a list of words (strings). Extracts the GloVe representation of each word and averages its value into a single vector encoding the meaning of the sentence. Arguments: sentence -- string, one training example from X word_to_vec_map -- dictionary mapping every word in a vocabulary into its 50-dimensional vector representation Returns: avg -- average vector encoding information about the sentence, numpy-array of shape (50,) """ ### START CODE HERE ### # Step 1: Split sentence into list of lower case words (≈ 1 line) words = sentence.lower().split()
# Initialize the average word vector, should have the same shape as your word vectors. avg = np.zeros(word_to_vec_map[words[0]].shape) # Step 2: average the word vectors. You can loop over the words in the list "words". for w in words: avg += word_to_vec_map[w] avg = avg / len(words) ### END CODE HERE ### return avg
defmodel(X, Y, word_to_vec_map, learning_rate = 0.01, num_iterations = 400): """ Model to train word vector representations in numpy. Arguments: X -- input data, numpy array of sentences as strings, of shape (m, 1) Y -- labels, numpy array of integers between 0 and 7, numpy-array of shape (m, 1) word_to_vec_map -- dictionary mapping every word in a vocabulary into its 50-dimensional vector representation learning_rate -- learning_rate for the stochastic gradient descent algorithm num_iterations -- number of iterations Returns: pred -- vector of predictions, numpy-array of shape (m, 1) W -- weight matrix of the softmax layer, of shape (n_y, n_h) b -- bias of the softmax layer, of shape (n_y,) """ np.random.seed(1)
# Define number of training examples m = Y.shape[0] # number of training examples n_y = 5# number of classes n_h = 50# dimensions of the GloVe vectors # Initialize parameters using Xavier initialization W = np.random.randn(n_y, n_h) / np.sqrt(n_h) b = np.zeros((n_y,)) # Convert Y to Y_onehot with n_y classes Y_oh = convert_to_one_hot(Y, C = n_y) # Optimization loop for t in range(num_iterations): # Loop over the number of iterations for i in range(m): # Loop over the training examples ### START CODE HERE ### (≈ 4 lines of code) # Average the word vectors of the words from the i'th training example avg = sentence_to_avg(X[i], word_to_vec_map)
# Forward propagate the avg through the softmax layer z = np.dot(W, avg) + b a = softmax(z)
# Compute cost using the i'th training label's one hot representation and "A" (the output of the softmax) cost = -np.sum(Y_oh[i] * np.log(a)) ### END CODE HERE ### # Compute gradients dz = a - Y_oh[i] dW = np.dot(dz.reshape(n_y,1), avg.reshape(1, n_h)) db = dz
# Update parameters with Stochastic Gradient Descent W = W - learning_rate * dW b = b - learning_rate * db if t % 100 == 0: print("Epoch: " + str(t) + " --- cost = " + str(cost)) pred = predict(X, Y, W, b, word_to_vec_map)
defsentences_to_indices(X, word_to_index, max_len): """ Converts an array of sentences (strings) into an array of indices corresponding to words in the sentences. The output shape should be such that it can be given to `Embedding()` (described in Figure 4). Arguments: X -- array of sentences (strings), of shape (m, 1) word_to_index -- a dictionary containing the each word mapped to its index max_len -- maximum number of words in a sentence. You can assume every sentence in X is no longer than this. Returns: X_indices -- array of indices corresponding to words in the sentences from X, of shape (m, max_len) """ m = X.shape[0] # number of training examples ### START CODE HERE ### # Initialize X_indices as a numpy matrix of zeros and the correct shape (≈ 1 line) X_indices = np.zeros((m, max_len)) for i in range(m): # loop over training examples # Convert the ith training sentence in lower case and split is into words. You should get a list of words. sentence_words =X[i].lower().split() # Initialize j to 0 j = 0 # Loop over the words of sentence_words for w in sentence_words: # Set the (i,j)th entry of X_indices to the index of the correct word. X_indices[i, j] = word_to_index[w] # Increment j to j + 1 j = j + 1 ### END CODE HERE ### return X_indices
defpretrained_embedding_layer(word_to_vec_map, word_to_index): """ Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors. Arguments: word_to_vec_map -- dictionary mapping words to their GloVe vector representation. word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words) Returns: embedding_layer -- pretrained layer Keras instance """ vocab_len = len(word_to_index) + 1# adding 1 to fit Keras embedding (requirement) emb_dim = word_to_vec_map["cucumber"].shape[0] # define dimensionality of your GloVe word vectors (= 50) ### START CODE HERE ### # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim) emb_matrix = np.zeros((vocab_len, emb_dim)) # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word]
# Define Keras embedding layer with the correct output/input sizes, make it trainable. Use Embedding(...). Make sure to set trainable=False. embedding_layer = Embedding(vocab_len,emb_dim, trainable=False) ### END CODE HERE ###
# Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None". embedding_layer.build((None,)) # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained. embedding_layer.set_weights([emb_matrix]) return embedding_layer
defEmojify_V2(input_shape, word_to_vec_map, word_to_index): """ Function creating the Emojify-v2 model's graph. Arguments: input_shape -- shape of the input, usually (max_len,) word_to_vec_map -- dictionary mapping every word in a vocabulary into its 50-dimensional vector representation word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words) Returns: model -- a model instance in Keras """ ### START CODE HERE ### # Define sentence_indices as the input of the graph, it should be of shape input_shape and dtype 'int32' (as it contains indices). sentence_indices = Input(shape= input_shape, dtype='int32') # Create the embedding layer pretrained with GloVe Vectors (≈1 line) embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index) # Propagate sentence_indices through your embedding layer, you get back the embeddings embeddings = embedding_layer(sentence_indices) # Propagate the embeddings through an LSTM layer with 128-dimensional hidden state # Be careful, the returned output should be a batch of sequences. X = LSTM(128, return_sequences=True)(embeddings) # Add dropout with a probability of 0.5 X = Dropout(0.5)(X) # Propagate X trough another LSTM layer with 128-dimensional hidden state # Be careful, the returned output should be a single hidden state, not a batch of sequences. X = LSTM(128, return_sequences=False)(X) # Add dropout with a probability of 0.5 X = Dropout(0.5)(X) # Propagate X through a Dense layer with softmax activation to get back a batch of 5-dimensional vectors. X = Dense(5, activation='softmax')(X) # Add a softmax activation X = Activation('softmax')(X) # Create Model instance which converts sentence_indices into X. model = Model(inputs=sentence_indices ,outputs=X) ### END CODE HERE ### return model