deftriplet_loss(y_true, y_pred, alpha = 0.2): """ Implementation of the triplet loss as defined by formula (3) Arguments: y_true -- true labels, required when you define a loss in Keras, you don't need it in this function. y_pred -- python list containing three objects: anchor -- the encodings for the anchor images, of shape (None, 128) positive -- the encodings for the positive images, of shape (None, 128) negative -- the encodings for the negative images, of shape (None, 128) Returns: loss -- real number, value of the loss """ anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2] ### START CODE HERE ### (≈ 4 lines) # Step 1: Compute the (encoding) distance between the anchor and the positive, you will need to sum over axis=-1 pos_dist = tf.reduce_sum(tf.square(anchor - positive),axis=-1) # Step 2: Compute the (encoding) distance between the anchor and the negative, you will need to sum over axis=-1 neg_dist = tf.reduce_sum(tf.square(anchor - negative),axis=-1) # Step 3: subtract the two previous distances and add alpha. basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha)
# Step 4: Take the maximum of basic_loss and 0.0. Sum over the training examples. loss = tf.reduce_sum(tf.maximum(basic_loss, 0.)) ### END CODE HERE ### return loss
defverify(image_path, identity, database, model): """ Function that verifies if the person on the "image_path" image is "identity". Arguments: image_path -- path to an image identity -- string, name of the person you'd like to verify the identity. Has to be a resident of the Happy house. database -- python dictionary mapping names of allowed people's names (strings) to their encodings (vectors). model -- your Inception model instance in Keras Returns: dist -- distance between the image_path and the image of "identity" in the database. door_open -- True, if the door should open. False otherwise. """ ### START CODE HERE ### # Step 1: Compute the encoding for the image. Use img_to_encoding() see example above. (≈ 1 line) encoding = img_to_encoding(image_path,model) # Step 2: Compute distance with identity's image (≈ 1 line) dist = np.linalg.norm(encoding-database[identity]) # Step 3: Open the door if dist < 0.7, else don't open (≈ 3 lines) if dist < 0.7: print("It's " + str(identity) + ", welcome home!") door_open = True else: print("It's not " + str(identity) + ", please go away") door_open = False ### END CODE HERE ### return dist, door_open
defwho_is_it(image_path, database, model): """ Implements face recognition for the happy house by finding who is the person on the image_path image. Arguments: image_path -- path to an image database -- database containing image encodings along with the name of the person on the image model -- your Inception model instance in Keras Returns: min_dist -- the minimum distance between image_path encoding and the encodings from the database identity -- string, the name prediction for the person on image_path """ ### START CODE HERE ### ## Step 1: Compute the target "encoding" for the image. Use img_to_encoding() see example above. ## (≈ 1 line) encoding = img_to_encoding(image_path,model) ## Step 2: Find the closest encoding ## # Initialize "min_dist" to a large value, say 100 (≈1 line) min_dist = 100 # Loop over the database dictionary's names and encodings. for (name, db_enc) in database.items(): # Compute L2 distance between the target "encoding" and the current "emb" from the database. (≈ 1 line) dist = np.linalg.norm(encoding-database[name])
# If this distance is less than the min_dist, then set min_dist to dist, and identity to name. (≈ 3 lines) if dist < min_dist: min_dist = dist identity = name
### END CODE HERE ### if min_dist > 0.7: print("Not in the database.") else: print ("it's " + str(identity) + ", the distance is " + str(min_dist)) return min_dist, identity
defcompute_content_cost(a_C, a_G): """ Computes the content cost Arguments: a_C -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image C a_G -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image G Returns: J_content -- scalar that you compute using equation 1 above. """ ### START CODE HERE ### # Retrieve dimensions from a_G (≈1 line) m, n_H, n_W, n_C = a_G.get_shape().as_list() # Reshape a_C and a_G (≈2 lines) a_C_unrolled = tf.reshape(a_C,[n_H * n_W, n_C]) a_G_unrolled = tf.reshape(a_G,[n_H * n_W, n_C]) # compute the cost with tensorflow (≈1 line) J_content = tf.reduce_sum(tf.square(a_C_unrolled - a_G_unrolled)) / (n_H * n_W * n_C * 4) ### END CODE HERE ### return J_content
计算J_style(S,G)
需要把三维矩阵展开,然后转置,做矩阵乘法,才能得到相关系数矩阵
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# GRADED FUNCTION: gram_matrix
defgram_matrix(A): """ Argument: A -- matrix of shape (n_C, n_H*n_W) Returns: GA -- Gram matrix of A, of shape (n_C, n_C) """ ### START CODE HERE ### (≈1 line) GA = tf.matmul(A,tf.transpose(A)) ### END CODE HERE ### return GA
defcompute_layer_style_cost(a_S, a_G): """ Arguments: a_S -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image S a_G -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image G Returns: J_style_layer -- tensor representing a scalar value, style cost defined above by equation (2) """ ### START CODE HERE ### # Retrieve dimensions from a_G (≈1 line) m, n_H, n_W, n_C = a_G.get_shape().as_list() # Reshape the images to have them of shape (n_C, n_H*n_W) (≈2 lines) a_S = tf.transpose(tf.reshape(a_S,[n_H*n_W, n_C])) a_G = tf.transpose(tf.reshape(a_G,[n_H*n_W, n_C]))
# Computing gram_matrices for both images S and G (≈2 lines) GS = gram_matrix(a_S) GG = gram_matrix(a_G)
# Computing the loss (≈1 line) J_style_layer = 1 / (4 * (n_C*n_W*n_H)**2) * tf.reduce_sum(tf.square(tf.subtract(GS,GG))) ### END CODE HERE ### return J_style_layer
deftotal_cost(J_content, J_style, alpha = 10, beta = 40): """ Computes the total cost function Arguments: J_content -- content cost coded above J_style -- style cost coded above alpha -- hyperparameter weighting the importance of the content cost beta -- hyperparameter weighting the importance of the style cost Returns: J -- total cost as defined by the formula above. """ ### START CODE HERE ### (≈1 line) J = alpha * J_content + beta * J_style ### END CODE HERE ### return J
1 2 3
### START CODE HERE ### (1 line) J = total_cost(J_content, J_style, alpha = 10, beta = 40) ### END CODE HERE ###
defmodel_nn(sess, input_image, num_iterations = 200): # Initialize global variables (you need to run the session on the initializer) ### START CODE HERE ### (1 line) sess.run(tf.global_variables_initializer()) ### END CODE HERE ### # Run the noisy input image (initial generated image) through the model. Use assign(). ### START CODE HERE ### (1 line) generated_image = sess.run(model['input'].assign(input_image)) ### END CODE HERE ### for i in range(num_iterations): # Run the session on the train_step to minimize the total cost ### START CODE HERE ### (1 line) sess.run(train_step) ### END CODE HERE ### # Compute the generated image by running the session on the current model['input'] ### START CODE HERE ### (1 line) generated_image = sess.run(model['input']) ### END CODE HERE ###
# Print every 20 iteration. if i%20 == 0: Jt, Jc, Js = sess.run([J, J_content, J_style]) print("Iteration " + str(i) + " :") print("total cost = " + str(Jt)) print("content cost = " + str(Jc)) print("style cost = " + str(Js)) # save current generated image in the "/output" directory save_image("output/" + str(i) + ".png", generated_image) # save last generated image save_image('output/generated_image.jpg', generated_image) return generated_image