2017-11-27 1 views
0

내가이 형태 변수 data에 저장된 데이터 세트가 한 데이터를 예측하는 내 네트워크의 실제 능력을 표현하지 않는 것 요소의 긴 정수 목록, data에는 50000 개의 요소가 있습니다.내 TensorFlow 네트워크의 정확도는

나는 'balance'으로 입력하여 'rank'을 예측할 수 있도록 네트워크를 원합니다. 이것은 내가 훈련하고 네트워크를 테스트하는 데 사용하는 코드입니다 :

Epoch 5000 out of 5000 completed, loss: 9.91669559479 
Train accuracy: 0.992933 
Test accuracy: 0.9686 
Calculating errors... 
correct: 0.02336 

는 내가 정말 이해가 안하는 방법이다 : 5000 신 (新) 시대 이후

import tensorflow as tf 
import numpy as np 
import multiprocessing as multip 

# this labels data so that a firm in class A has label [1, 0, 0, 0, 0, 0, 0], a firm in 
# class B [0, 1, 0, 0, 0, 0, 0] and so on 
def calc_label(data): 
    label = [0, 0, 0, 0, 0, 0, 0] 
    if data['rank'] == 'A': 
     label[0] = 1 
    elif data['rank'] == 'B': 
     label[1] = 1 
    elif data['rank'] == 'C': 
     label[2] = 1 
    elif data['rank'] == 'D': 
     label[3] = 1 
    elif data['rank'] == 'E': 
     label[4] = 1 
    elif data['rank'] == 'F': 
     label[5] = 1 
    elif data['rank'] == 'Def': 
     label[6] = 1 
    return label 


data = [ 
    {'index': 123, 
    'balance': [], 
    'probaility': 0.89, 
    'failed': True, 
    'rank': 'A'}, 
    {'index': 50234, 
    'balance': [], 
    'probaility': 0.45, 
    'failed': False, 
    'rank': 'B'}] 

features = [x['balance'] for x in data] 
labels = [calc_label(x) for x in data] 

train_size = int(len(labels) * 0.9) 
train_y = labels[:train_size] 
test_y = labels[train_size:] 
train_x = features[:train_size] 
test_x = features[train_size:] 

classes_n = len(labels[0]) 
nodes_per_layer = [100, 100] 
hidden_layers_n = len(nodes_per_layer) 
batch_size = 50000 
epochs = 500 
print_step = 50 
saving_step = 100 

x = tf.placeholder('float', [None, len(features[0])]) 
y = tf.placeholder('float', [None, classes_n]) 

current_epoch = tf.Variable(1) 

layers = [{'weights': tf.Variable(tf.random_normal([len(features[0]), nodes_per_layer[0]])), 
      'biases': tf.Variable(tf.random_normal([nodes_per_layer[0]]))}] 

for i in range(1, hidden_layers_n): 
    layers.append({'weights': tf.Variable(tf.random_normal([nodes_per_layer[i - 1], nodes_per_layer[i]])), 
        'biases': tf.Variable(tf.random_normal([nodes_per_layer[i]]))}) 

output_layer = {'weights': tf.Variable(tf.random_normal([nodes_per_layer[-1], classes_n])), 
       'biases': tf.Variable(tf.random_normal([classes_n]))} 


def neural_network_model(data): 
    l = [] 

    l.append(tf.add(tf.matmul(x, layers[0]['weights']), layers[0]['biases'])) 
    l[0] = tf.nn.relu(l[0]) 

    for i in range(1, hidden_layers_n): 
     l.append(tf.add(tf.matmul(l[i - 1], layers[i]['weights']), layers[i]['biases'])) 
     l[i] = tf.nn.relu(l[i]) 

    output = tf.add(tf.matmul(l[hidden_layers_n - 1], output_layer['weights']), output_layer['biases']) 

    return output 


def train_neural_network(x): 
    prediction = neural_network_model(x) 

    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) 

    optimizer = tf.train.AdamOptimizer().minimize(cost) 

    with tf.Session() as sess: 
     sess.run(tf.global_variables_initializer()) 
     epoch = 1 

     print('Starting training...') 
     while epoch <= epochs: 
      epoch_loss = 1 
      i = 0 
      while i < len(train_x): 
       start = i 
       end = i + batch_size 
       batch_x = np.array(train_x[start:end]) 
       batch_y = np.array(train_y[start:end]) 
       _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y}) 
       epoch_loss += c 
       i += batch_size 

      if (epoch + 1) % print_step == 0: 
       print('Epoch', epoch + 1, 'out of', 
         '{} completed,'.format(epochs), 'loss:', epoch_loss) 
       correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1)) 
       accuracy = tf.reduce_mean(tf.cast(correct, 'float')) 
       accuracy_number = accuracy.eval({x: test_x, y: test_y}) 
       accuracy_number_training_set = accuracy.eval({x: train_x, y: train_y}) 
       print('Train accuracy:', accuracy_number_training_set) 
       print('Test accuracy:', accuracy_number) 
      epoch += 1 

train_neural_network(x) 


# this functions converts predictions expressed in numbers to letters corresponding to the different ranking 
# classes, for example 0 -> A, 1 -> B, 2 -> C and so on. 
def convert_prediction(value): 
    predict = '' 
    if value == 6: 
     predict = 'Def' 
    elif value == 5: 
     predict = 'F' 
    elif value == 4: 
     predict = 'E' 
    elif value == 3: 
     predict = 'D' 
    elif value == 2: 
     predict = 'C' 
    elif value == 1: 
     predict = 'B' 
    elif value == 0: 
     predict = 'A' 
    return predict 


def use_neural_network(input_data): 
    prediction = neural_network_model(x) 

    with tf.Session() as sess: 
     sess.run(tf.global_variables_initializer()) 
     feed_list = [(k['index'], k['balance']) for k in input_data] 
     indexes = [k[0] for k in feed_list] 
     predictions = sess.run(tf.argmax(prediction.eval(feed_dict={x: [k[1] for k in feed_list]}), 1)) 
     predictions = np.array([convert_prediction(value) for value in predictions]) 
     result = list(zip(indexes, predictions)) 
     return result 

if __name__ == '__main__': 

    prediction = use_neural_network(data) 

    print('\nCalculating errors...') 

    predictions_dict = {'A': [], 
         'B': [], 
         'C': [], 
         'D': [], 
         'E': [], 
         'F': [], 
         'Def': []} 

    def create_predictions_dict(index, rank): 
      for j in data: 
       if j['index'] == index: 
        return index, j['rank'], rank 

    np = multip.cpu_count() 
    p = multip.Pool(processes=np) 
    predictions_list = p.starmap(create_predictions_dict, prediction[:5000]) 
    p.close() 
    p.join() 

    for elem in predictions_list: 
     predictions_dict[elem[1]].append(elem) 

    def is_correct(x): 
     if x[1] == x[2]: 
      return 1 
     else: 
      return 0 
    correct_guesses = sum(is_correct(x) for x in predictions_list) 
    correct_ratio = correct_guesses/len(data) 

    print('correct:', correct_ratio) 

을, 이것은 내가 얻을 결과입니다 TensorFlow에 내장 된 방법으로 계산 된 정확도는 너무 높을 수 있지만 손으로 ​​계산 한 정확도는 너무 낮습니다. 일반적으로 예측에서 데이터를 추출 할 때 정확도 TF가 높을수록 정확도가 낮아집니다.

이렇게하면 네트워크를 최대한 정확하게 추측 할 수있는 대신 가능한 한 잘못된 것으로 추측하도록 교육 할 것입니다. 그러나, 나는 또한 문제가 어디에 있는지 보지 못한다. 어쩌면 비용 함수에서? 이 질문에 제안한 것처럼

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) 

--- 편집 ---

, 내가 수정 한 테스트 FASE에서 변수의 복원,하지만 여전히 나는 (0.1 주위에) 매우 낮은 정밀도를 얻을 수 . 코드에서

import tensorflow as tf 
import numpy as np 
import multiprocessing as multip 

# this labels data so that a firm in class A has label [1, 0, 0, 0, 0, 0, 0], a firm in 
# class B [0, 1, 0, 0, 0, 0, 0] and so on 
def calc_label(data): 
    label = [0, 0, 0, 0, 0, 0, 0] 
    if data['rank'] == 'A': 
     label[0] = 1 
    elif data['rank'] == 'B': 
     label[1] = 1 
    elif data['rank'] == 'C': 
     label[2] = 1 
    elif data['rank'] == 'D': 
     label[3] = 1 
    elif data['rank'] == 'E': 
     label[4] = 1 
    elif data['rank'] == 'F': 
     label[5] = 1 
    elif data['rank'] == 'Def': 
     label[6] = 1 
    return label 


data = [ 
    {'index': 123, 
    'balance': [], 
    'probaility': 0.89, 
    'failed': True, 
    'rank': 'A'}, 
    {'index': 50234, 
    'balance': [], 
    'probaility': 0.45, 
    'failed': False, 
    'rank': 'B'}] 


features_and_labels = [[x['balance'], calc_label(x)] for x in data] 
features = [x[0] for x in features_and_labels] 
labels = [x[1] for x in features_and_labels] 

train_size = int(len(labels) * 0.9) 
train_y = labels[:train_size] 
test_y = labels[train_size:] 
train_x = features[:train_size] 
test_x = features[train_size:] 

classes_n = len(labels[0]) 
nodes_per_layer = [100, 100] 
hidden_layers_n = len(nodes_per_layer) 
batch_size = 50000 
epochs = 1000 
print_step = 50 
saving_step = 100 

x = tf.placeholder('float', [None, len(features[0])]) 
y = tf.placeholder('float', [None, classes_n]) 

current_epoch = tf.Variable(1) 

layers = [{'weights': tf.Variable(tf.random_normal([len(features[0]), nodes_per_layer[0]])), 
      'biases': tf.Variable(tf.random_normal([nodes_per_layer[0]]))}] 

for i in range(1, hidden_layers_n): 
    layers.append({'weights': tf.Variable(tf.random_normal([nodes_per_layer[i - 1], nodes_per_layer[i]])), 
        'biases': tf.Variable(tf.random_normal([nodes_per_layer[i]]))}) 

output_layer = {'weights': tf.Variable(tf.random_normal([nodes_per_layer[-1], classes_n])), 
       'biases': tf.Variable(tf.random_normal([classes_n]))} 


def neural_network_model(data): 
    l = [] 

    l.append(tf.add(tf.matmul(x, layers[0]['weights']), layers[0]['biases'])) 
    l[0] = tf.nn.relu(l[0]) 

    for i in range(1, hidden_layers_n): 
     l.append(tf.add(tf.matmul(l[i - 1], layers[i]['weights']), layers[i]['biases'])) 
     l[i] = tf.nn.relu(l[i]) 

    output = tf.add(tf.matmul(l[hidden_layers_n - 1], output_layer['weights']), output_layer['biases']) 

    return output 


saver = tf.train.Saver() 
tf_log = 'tf.log' 


def train_neural_network(x): 
    prediction = neural_network_model(x) 

    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) 

    optimizer = tf.train.AdamOptimizer().minimize(cost) 

    with tf.Session() as sess: 
     sess.run(tf.global_variables_initializer()) 
     try: 
      epoch = int(open(tf_log, 'r').read().split('\n')[-2]) + 1 
      print('Starting epoch:', epoch) 
     except: 
      epoch = 1 

     if epoch != 1: 
      saver.restore(sess, "model.ckpt") 

     print('Starting training...') 
     while epoch <= epochs: 
      epoch_loss = 1 
      i = 0 
      while i < len(train_x): 
       start = i 
       end = i + batch_size 
       batch_x = np.array(train_x[start:end]) 
       batch_y = np.array(train_y[start:end]) 
       _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y}) 
       epoch_loss += c 
       i += batch_size 

      if (epoch + 1) % print_step == 0: 
       print('Epoch', epoch + 1, 'out of', 
         '{} completed,'.format(epochs), 'loss:', epoch_loss) 
       correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1)) 
       accuracy = tf.reduce_mean(tf.cast(correct, 'float')) 
       accuracy_number = accuracy.eval({x: test_x, y: test_y}) 
       accuracy_number_training_set = accuracy.eval({x: train_x, y: train_y}) 
       print('Train accuracy:', accuracy_number_training_set) 
       print('Test accuracy:', accuracy_number) 

      if epoch == 1: 
       saver.save(sess, "model.ckpt") 
      if (epoch + 1) % saving_step == 0: 
       saver.save(sess, "model.ckpt") 
       # print('Epoch', epoch, 'completed out of', epochs, 'loss:', epoch_loss) 
       with open(tf_log, 'a') as f: 
        f.write(str(epoch) + '\n') 
      epoch += 1 

train_neural_network(x) 

# this functions converts predictions expressed in numbers to letters corresponding to the different ranking 
# classes, for example 0 -> A, 1 -> B, 2 -> C and so on. 
def convert_prediction(value): 
    predict = '' 
    if value == 6: 
     predict = 'Def' 
    elif value == 5: 
     predict = 'F' 
    elif value == 4: 
     predict = 'E' 
    elif value == 3: 
     predict = 'D' 
    elif value == 2: 
     predict = 'C' 
    elif value == 1: 
     predict = 'B' 
    elif value == 0: 
     predict = 'A' 
    return predict 


def use_neural_network(input_data): 
    prediction = neural_network_model(x) 

    with tf.Session() as sess: 
     for word in ['weights', 'biases']: 
      output_layer[word].initializer.run() 
      for variable in layers: 
       variable[word].initializer.run() 
     saver.restore(sess, "model.ckpt") 
     feed_list = [(k['index'], k['balance']) for k in input_data] 
     indexes = [k[0] for k in feed_list] 
     predictions = sess.run(tf.argmax(prediction.eval(feed_dict={x: [k[1] for k in feed_list]}), 1)) 
     predictions = np.array([convert_prediction(value) for value in predictions]) 
     result = list(zip(indexes, predictions)) 
     return result 

if __name__ == '__main__': 

    prediction = use_neural_network(data) 

    print('\nCalculating errors...') 

    predictions_dict = {'A': [], 
         'B': [], 
         'C': [], 
         'D': [], 
         'E': [], 
         'F': [], 
         'Def': []} 

    def create_predictions_dict(index, rank): 
      for j in data: 
       # checks which predictions are made to which firms and adds them to predictions_dict 
       if j['index'] == index: 
        return index, j['rank'], rank 

    np = multip.cpu_count() 
    p = multip.Pool(processes=np) 
    predictions_list = p.starmap(create_predictions_dict, prediction[:5000]) 
    p.close() 
    p.join() 

    for elem in predictions_list: 
     predictions_dict[elem[1]].append(elem) 

    def is_correct(x): 
     if x[1] == x[2]: 
      return 1 
     else: 
      return 0 
    correct_guesses = sum(is_correct(x) for x in predictions_list) 
    correct_ratio = correct_guesses/len(data) 

    print('correct:', correct_ratio) 

답변

1

:

def use_neural_network(input_data): 
    prediction = neural_network_model(x) 

    with tf.Session() as sess: 
     sess.run(tf.global_variables_initializer()) #<<<<<<<<<<<<<<<<<< 

tf.global_variables_initializer

은 즉, 그것은 할 어떤 훈련을 멀리 쳐, 네트워크에있는 모든 변수를 초기화이 업데이트 된 코드입니다. 대신 학습의 끝에서 네트워크의 가중치를 검사 점에 저장 한 다음 네트워크 변수에서 학습 된 가중치를 tf.train.Saver()restore()을 통해로드합니다.

Tensorflow 웹 사이트에서 네트워크 가중치를 저장하고 복원하는 방법은 in-depth tutorial입니다.

관련 문제