0
작동하지 않습니다. 스트리밍 메트릭을 올바르게 사용하고 있습니까? 문서에는 의사 코드 만 있습니다. 여기 Tensorflow 스트리밍 메트릭 내가 여기 알고리즘을 구현하기 위해 노력하고
import tensorflow as tf
import numpy as np
from nltk.corpus import brown
from gensim.models import Word2Vec
from sklearn.preprocessing import LabelBinarizer
from tensorflow.contrib.metrics import streaming_accuracy, streaming_recall
data = brown.tagged_sents()
tags = set()
for sent in data:
for token, tag in sent:
tags.add(tag)
label_processor = LabelBinarizer()
label_processor.fit(list(tags))
embedding_dim = 100
word2vec = Word2Vec(brown.sents(), size=embedding_dim, min_count=1)
embedding = word2vec.wv
del word2veC# Saves RAM
test = 0.1
val = 0.1
data_length = len(data)
inds = np.random.permutation(np.arange(data_length))
test_inds = inds[:int(data_length*test)]
val_inds = inds[int(data_length*test):int(data_length*(val+test))]
train_inds = inds[int(data_length*(val+test)):]
val_x = []
val_y = []
for i in val_inds:
x = []
tags = []
for token, tag in data[i]:
x.append(embedding[token])
tags.append(tag)
x = np.array(x)
x = x.reshape(x.shape[0], 1, x.shape[1], 1)
y = np.array(label_processor.transform(tags))
val_x.append(x)
val_y.append(y)
val_x = np.concatenate(val_x, axis=0)
val_y = np.concatenate(val_y, axis=0)
test_x = []
test_y = []
for i in test_inds:
x = []
tags = []
for token, tag in data[i]:
x.append(embedding[token])
tags.append(tag)
x = np.array(x)
x = x.reshape(x.shape[0], 1, x.shape[1], 1)
y = np.array(label_processor.transform(tags))
test_x.append(x)
test_y.append(y)
test_x = np.concatenate(test_x, axis=0)
test_y = np.concatenate(test_y, axis=0)
learning_rate = 0.001
n_iter = 12000
display_step = 100
depth = 5
label_processor
n_classes = label_processor.classes_.shape[0]
dropout_prob = 0.50
x = tf.placeholder(tf.float32, [None, 1, embedding_dim, 1])
y = tf.placeholder(tf.float32, [None, n_classes])
dropout = tf.placeholder(tf.float32, [])
depth_tensor = tf.constant(depth, tf.float32)
def data_processor(data, train_inds, word2vec, label_processor, n_iter):
inds = np.random.randint(len(train_inds), size=(n_iter))
for i in inds:
x = []
tags = []
for token, tag in data[train_inds[i]]:
x.append(word2vec[token])
tags.append(tag)
x = np.array(x)
x = x.reshape(x.shape[0], 1, x.shape[1], 1)
y = np.array(label_processor.transform(tags))
yield x, y
def model(x, y, weights, biases, dropout, depth_tensor):
net = tf.nn.dropout(x, dropout)
net = tf.nn.conv2d(net, weights['first'], strides=[1, 1, 1, 1], padding='SAME')
net = tf.nn.bias_add(net, biases['first'])
net = tf.nn.relu(net)
net_flat = tf.reshape(net, [-1, weights['out'].get_shape().as_list()[0]])
preds = tf.add(tf.matmul(net_flat, weights['out']), biases['out'])
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=y))
for i in range(1, depth):
net = tf.nn.dropout(net, dropout)
net = tf.nn.atrous_conv2d(net, weights['iterated'], rate=2**i, padding='SAME')
net = tf.nn.bias_add(net, biases['iterated'])
net = tf.nn.relu(net)
net_flat = tf.reshape(net, [-1, weights['out'].get_shape().as_list()[0]])
preds = tf.add(tf.matmul(net_flat, weights['out']), biases['out'])
cost += tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=y))
return preds, tf.divide(cost, depth_tensor)
weights = {'first': tf.Variable(tf.random_normal([1, 3, 1, 10])),
'iterated': tf.Variable(tf.random_normal([1, 3, 10, 10])),
'out': tf.Variable(tf.random_normal([embedding_dim*10, n_classes]))}
biases = {'first': tf.Variable(tf.random_normal([10])),
'iterated': tf.Variable(tf.random_normal([10])),
'out': tf.Variable(tf.random_normal([n_classes]))}
preds, cost = model(x, y, weights, biases, dropout, depth_tensor)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
accuracy, update_accuracy = streaming_accuracy(y, preds)
recall, update_recall = streaming_recall(y, preds)
init = tf.global_variables_initializer()
init2 = tf.local_variables_initializer()
with tf.Session() as sess:
sess.run(init)
i = 1
for batch_x, batch_y in data_processor(data, train_inds, embedding, label_processor, n_iter):
sess.run(optimizer,
feed_dict={x: batch_x, y: batch_y,
dropout: dropout_prob})
if i % display_step == 0:
loss = sess.run(cost,
feed_dict={x: batch_x, y: batch_y, dropout: dropout_prob})
print("Iter:{}, Minibatch Loss:{:.6f}".format(i,loss))
i += 1
sess.run(init2)
for batch_x, batch_y in data_processor(data, val_inds, embedding, label_processor, n_iter):
recall, accuracy = sess.run([update_recall, update_accuracy],
feed_dict={x:batch_x, y: batch_y, dropout: 1})
f1 = 2 * recall * accuracy/(recall + accuracy)
print("Testing Accuracy:", accuracy,"Testing Recall:", recall, "Testing F1 Score:", f1)
그리고
내가 스트리밍 메트릭을 사용하는 일부입니다 : 여기 내 코드입니다
accuracy, update_accuracy = streaming_accuracy(y, preds)
recall, update_recall = streaming_recall(y, preds)
init = tf.global_variables_initializer()
init2 = tf.local_variables_initializer()
with tf.Session() as sess:
sess.run(init)
i = 1
for batch_x, batch_y in data_processor(data, train_inds, embedding, label_processor, n_iter):
sess.run(optimizer,
feed_dict={x: batch_x, y: batch_y,
dropout: dropout_prob})
if i % display_step == 0:
loss = sess.run(cost,
feed_dict={x: batch_x, y: batch_y, dropout: dropout_prob})
print("Iter:{}, Minibatch Loss:{:.6f}".format(i,loss))
i += 1
sess.run(init2)
for batch_x, batch_y in data_processor(data, val_inds, embedding, label_processor, n_iter):
recall, accuracy = sess.run([update_recall, update_accuracy],
feed_dict={x:batch_x, y: batch_y, dropout: 1})
f1 = 2 * recall * accuracy/(recall + accuracy)
이와 같은 긴 코드 섹션에서 잘못된 점을 정확히 말하기는 어렵습니다. 공유 할 수있는 재현 할만한 작은 케이스가 있습니까? –
스트리밍 메트릭을 사용하는 부분을 분리했습니다. 이런 식으로 괜찮습니까? –