2017-12-13 4 views
2

나는 현재 tensorFlow를 사용하고 있으며, 약간의 튜토리얼을 완성하기는 쉽지만, 우리가 자신의 데이터를 입력하려고 할 때 실제 작업이 시작됩니다.Tensor flow와 RandomShuffleQueue "부족한 요소들 (요청 된 64, 현재 크기 0)"

나는 동물과 배경의 아주 기본적인 데이터 세트 작곡가를 사용했다.

3 개의 tfrecords (train/val/test)를 만들었습니다. 나는 그 다음 그들을 읽고, 간단한 모델을 훈련 시키려고 노력한다 (Alexnet 여기). "FLAGS.num_iter"를 사용하여 반복 범위를 벗어나지 않았는지 확인하려고했습니다.

이 코드 처리로 인해 RandomShuffleQueue "부족한 요소 (요청 된 64, 현재 크기 0)"오류가 발생합니다.

웹을 파고 들었지만 질문에 대한 답변을 찾지 못했습니다. 여기 그들은 다음과 같습니다. 어떻게 해결할 수 있습니까? tfrecord에 실수가 있는지 어떻게 확인할 수 있습니까? 충분한 요소가 있는지 확인하기위한 조건을 작성할 수 있습니까? 내 코드에 대한 추가 질문이 있으면 주위에 머물러 있습니다! I/O의 큐 주자 API를 사용하여

안부는

import tensorflow as tf 
import os.path 
from model import Model 
from alexnet import Alexnet 


FLAGS = tf.app.flags.FLAGS 
NUM_LABELS = 2 

IMAGE_WIDTH = 64 
IMAGE_HEIGHT = 64 
NUMBER_OF_CHANNELS = 3 
#SOURCE_DIR = './data/' 
#TRAINING_IMAGES_DIR = SOURCE_DIR + 'train/' 
#LIST_FILE_NAME = 'list.txt' 
BATCH_SIZE = 2 
#TRAINING_SET_SIZE = 81112 
TRAIN_FILE = '/home/sebv/SebV/datas/tfRecording/train.tfrecords' 
VAL_FILE = '/home/sebv/SebV/datas/tfRecording/val.tfrecor' 

def read_and_decode(filename_queue): 
    reader = tf.TFRecordReader() 
    _, serialized_example = reader.read(filename_queue) 
    features = tf.parse_single_example(
     serialized_example, 
     # Defaults are not specified since both keys are required. 
     features={ 
      'image/encoded': tf.FixedLenFeature([], tf.string), 
      'image/format': tf.FixedLenFeature([], tf.string), 
      'image/class/label': tf.FixedLenFeature([], tf.int64), 
      'image/height': tf.FixedLenFeature([], tf.int64), 
      'image/width': tf.FixedLenFeature([], tf.int64), 
     }) 

    # Convert from a scalar string tensor (whose single string has 
    # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape 
    # [mnist.IMAGE_PIXELS]. 
    image = tf.image.decode_png(features['image/encoded'], 3, tf.uint8) 

    # OPTIONAL: Could reshape into a 28x28 image and apply distortions 
    # here. Since we are not applying any distortions in this 
    # example, and the next step expects the image to be flattened 
    # into a vector, we don't bother. 

    # Convert from [0, 255] -> [-0.5, 0.5] floats. 
    image = tf.cast(image, tf.float32)# * (1./255) - 0.5 
    image = tf.reshape(image, [IMAGE_WIDTH,IMAGE_HEIGHT,NUMBER_OF_CHANNELS]) 
    # Convert label from a scalar uint8 tensor to an int32 scalar. 
    label = tf.cast(features['image/class/label'], tf.int64) 

    return image, label 


def inputs(train, filen, batch_size, num_epochs): 
    """Reads input data num_epochs times. 
    Args: 
    train: Selects between the training (True) and validation (False) data. 
    batch_size: Number of examples per returned batch. 
    num_epochs: Number of times to read the input data, or 0/None to 
    train forever. 
    Returns: 
    A tuple (images, labels), where: 
    * images is a float tensor with shape [batch_size, mnist.IMAGE_PIXELS] 
    in the range [-0.5, 0.5]. 
    * labels is an int32 tensor with shape [batch_size] with the true label, 
    a number in the range [0, mnist.NUM_CLASSES). 
    Note that an tf.train.QueueRunner is added to the graph, which 
    must be run using e.g. tf.train.start_queue_runners(). 
    """ 
    if not num_epochs: num_epochs = None 
    filename = filen 
    filename_queue = tf.train.string_input_producer([filename], num_epochs=num_epochs) 

    # Even when reading in multiple threads, share the filename 
    # queue. 
    image, label = read_and_decode(filename_queue) 
    # Shuffle the examples and collect them into batch_size batches. 
    # (Internally uses a RandomShuffleQueue.) 
    # We run this in two threads to avoid being a bottleneck. 
    images, sparse_labels = tf.train.shuffle_batch([image, label], batch_size=batch_size, num_threads=2,capacity=20000 + 3 * batch_size,min_after_dequeue=20000) 
    sparse_labels = tf.reshape(sparse_labels, [batch_size]) 
    return images, sparse_labels 


def train(): 
    model = Alexnet() 
    with tf.Graph().as_default(): 

     x = tf.placeholder(tf.float32, [None, IMAGE_WIDTH,IMAGE_HEIGHT,NUMBER_OF_CHANNELS], name='x-input') 
     y = tf.placeholder(tf.float32, [None], name='y-input') 

     images, labels = inputs(train=True, filen=TRAIN_FILE, batch_size=FLAGS.batch_size,num_epochs=FLAGS.num_iter) 

     images_val, labels_val = inputs(train=False, filen=VAL_FILE, batch_size=FLAGS.batch_size,num_epochs=1) 

     keep_prob = tf.placeholder(tf.float32, name='dropout_prob') 
     global_step = tf.contrib.framework.get_or_create_global_step() 

     logits = model.inference(images, keep_prob=keep_prob) 
     loss = model.loss(logits=logits, labels=labels) 

     accuracy = model.accuracy(logits, labels) 
     summary_op = tf.summary.merge_all() 
     train_op = model.train(loss, global_step=global_step) 

     saver = tf.train.Saver() 

     with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: 
      writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph) 
      sess.run(tf.global_variables_initializer()) 
      sess.run(tf.local_variables_initializer()) 
      coord = tf.train.Coordinator() 
      threads = tf.train.start_queue_runners(sess=sess, coord=coord) 
      for i in xrange(FLAGS.num_iter): 
       _, cur_loss, summary = sess.run([train_op, loss, summary_op], 
               feed_dict={keep_prob: 0.5}) 
       writer.add_summary(summary, i) 

       if i % 10 == 0: 

        batch_x = sess.run(images_val) 
        batch_y = sess.run(labels_val) 
        validation_accuracy = accuracy.eval(feed_dict={x: batch_x, y: batch_y, keep_prob: 1.0}) 
        print('Iter {} Accuracy: {}'.format(i, validation_accuracy)) 
        saver.save(sess, FLAGS.checkpoint_file_path, global_step) 
       if i == FLAGS.num_iter: 
        coord.request_stop() 
        coord.join(threads) 



def main(argv=None): 
    train() 


if __name__ == '__main__': 
    tf.app.flags.DEFINE_integer('batch_size', 64, 'size of training batches') 
    tf.app.flags.DEFINE_integer('num_iter', 4001, 'number of training iterations') #10000 
    tf.app.flags.DEFINE_string('checkpoint_file_path', 'checkpoints/model.ckpt-10000', 'path to checkpoint file') 
    tf.app.flags.DEFINE_string('train_data', 'data', 'path to train and test data') 
    tf.app.flags.DEFINE_string('summary_dir', 'graphs', 'path to directory for storing summaries') 

    tf.app.run() 

답변

0

나는 실제로 왜이 오류가 발생하는지 알아 냈습니다. 사실, 내가 보낸 이미지 중 일부는 64 * 64 크기가 아닙니다. 그러므로 그들은 1 64 64 3으로 모양을 바꿀 수 없었습니다. 왜 여기에 오류가 없었는지 나는 알지 못합니다. 그리고 나는 단지 뒤섞인 이미지에서만 그것을 얻습니다.

이미지를 변경하기 전에 크기를 조정 했으므로 이제는 좋습니다!

또한 jsimsa에게 감사드립니다.

2

되지 않습니다. 대신 tf.data API를 사용하는 것이 좋습니다.

def input_fn(params): 
    """Passes data to the estimator as required.""" 

    batch_size = params["batch_size"] 

    def parser(serialized_example): 
    """Parses a single tf.Example into a 224x224 image and label tensors.""" 

    final_image = None 
    final_label = None 
    if FLAGS.preprocessed: 
     features = tf.parse_single_example(
      serialized_example, 
      features={ 
       "image": tf.FixedLenFeature([], tf.string), 
       "label": tf.FixedLenFeature([], tf.int64), 
      }) 
     image = tf.decode_raw(features["image"], tf.float32) 
     image.set_shape([224 * 224 * 3]) 
     final_label = tf.cast(features["label"], tf.int32) 
    else: 
     features = tf.parse_single_example(
      serialized_example, 
      features={ 
       "image/encoded": tf.FixedLenFeature([], tf.string), 
       "image/class/label": tf.FixedLenFeature([], tf.int64), 
      }) 
     image = tf.image.decode_jpeg(features["image/encoded"], channels=3) 
     image = tf.image.resize_images(
      image, 
      size=[224, 224]) 
     final_label = tf.cast(features["image/class/label"], tf.int32) 

    final_image = (tf.cast(image, tf.float32) * (1./255)) - 0.5 

    return final_image, final_label 

    file_pattern = os.path.join(FLAGS.data_dir, "train-*") 
    dataset = tf.data.Dataset.list_files(file_pattern) 

    if FLAGS.filename_shuffle_buffer_size > 0: 
    dataset = dataset.shuffle(buffer_size=FLAGS.filename_shuffle_buffer_size) 
    dataset = dataset.repeat() 

    def prefetch_map_fn(filename): 
    dataset = tf.data.TFRecordDataset(
     filename, buffer_size=FLAGS.dataset_reader_buffer_size) 
    if FLAGS.prefetch_size is None: 
     dataset = dataset.prefetch(batch_size) 
    else: 
     if FLAGS.prefetch_size > 0: 
     dataset = dataset.prefetch(FLAGS.prefetch_size) 
    return dataset 

    if FLAGS.use_sloppy_interleave: 
    dataset = dataset.apply(
     tf.contrib.data.sloppy_interleave(
      prefetch_map_fn, cycle_length=FLAGS.cycle_length)) 
    else: 
    dataset = dataset.interleave(
     prefetch_map_fn, cycle_length=FLAGS.cycle_length) 

    if FLAGS.element_shuffle_buffer_size > 0: 
    dataset = dataset.shuffle(buffer_size=FLAGS.element_shuffle_buffer_size) 

    dataset = dataset.map(
     parser, 
     num_parallel_calls=FLAGS.num_parallel_calls).prefetch(batch_size) 

    dataset = dataset.batch(batch_size)  
    dataset = dataset.prefetch(1) 
    images, labels = dataset.make_one_shot_iterator().get_next() 
    return (
     tf.reshape(images, [batch_size, 224, 224, 3]), 
     tf.reshape(labels, [batch_size]) 
) 

당신이 programmer's guidetf.dataAPI에 대해 자세히 알아볼 수 있습니다 : 여기 Estimator와 함께 사용할 수 있습니다 AlexNet 데이터에 대한 입력 기능에 대한 자세한 예입니다.

관련 문제