2017-10-25 1 views
0

저는 잠시 동안 프로젝트를 진행해 왔으며, 우리는 이 매우 작습니다. 데이터가 아주 적습니다. 훨씬 더 큰 데이터 세트를 만들 수 있다면 훨씬 더 좋아질 것입니다. 즉, 옆으로, 순간 내 문제는 내가 문장 입력이있을 때, 내 출력이 지금 당장 같이이다 :Seq2Seq 네트워크가 단어를 반복해서 출력 할 때해야 할 일은 무엇입니까?

contactId의 contactId의 contactId의 contactId를 하나의 단어에 초점을 맞추고 이상 반복

및 다시. 이 장애물을 극복하기 위해 무엇을 할 수 있습니까?

상황은 나는 시도했다 :

  1. 더블 내가, 내가 Vocab의 공유를하고 시작/토큰을 중지하고 토큰이 제대로 자신의 Vocab의 파일의 상단에 넣고 확인 추가 된 검사.
  2. 단어 삽입 문제로 인해 발생할 수 있다는 것을 발견했습니다. 그 결과 나는 tensorboard로 확인해 보았고 충분한 PCA가 매우 조밀 한 점 집합을 보였다. 페이스 북의 사전 훈련 된 단어 벡터를 잡아서 임베디드로로드했다는 것을 알았습니다. 다시 훈련을 받았고 이번에는 tensorboard PCA가 훨씬 더 나은 그림을 보여주었습니다.
  3. 기본에서 SampledScheduling으로 나의 훈련 스케줄러를 전환하여 때때로 훈련 결과를 지상 진실로 바꿉니다.
  4. 빔 디코더를 사용하도록 내 디코더를 전환했습니다. 단어 선택이 중개 기능 영역에서 근접해있는 경우 더 강력한 응답을 줄 수 있다고 생각했습니다.

내 혼란이 꾸준히 줄어들고 있습니다. 여기

class ModelInputs(object): 
"""Factory to construct various input hooks and functions depending on mode """ 

def __init__(
    self, vocab_files, batch_size, 
    share_vocab=True, src_eos_id=1, tgt_eos_id=2 
): 
    self.batch_size = batch_size 
    self.vocab_files = vocab_files 
    self.share_vocab = share_vocab 
    self.src_eos_id = src_eos_id 
    self.tgt_eos_id = tgt_eos_id 

def get_inputs(self, file_path, num_infer=None, mode=tf.estimator.ModeKeys.TRAIN): 
    self.mode = mode 
    if self.mode == tf.estimator.ModeKeys.TRAIN: 
     return self._training_input_hook(file_path) 
    if self.mode == tf.estimator.ModeKeys.EVAL: 
     return self._validation_input_hook(file_path) 
    if self.mode == tf.estimator.ModeKeys.PREDICT: 
     if num_infer is None: 
      raise ValueError('If performing inference must supply number of predictions to be made.') 
     return self._infer_input_hook(file_path, num_infer) 

def _prepare_data(self, dataset, out=False): 
    prep_set = dataset.map(lambda string: tf.string_split([string]).values) 
    prep_set = prep_set.map(lambda words: (words, tf.size(words))) 
    if out == True: 
     return prep_set.map(lambda words, size: (self.vocab_tables[1].lookup(words), size)) 
    return prep_set.map(lambda words, size: (self.vocab_tables[0].lookup(words), size)) 

def _batch_data(self, dataset, src_eos_id, tgt_eos_id): 
    batched_set = dataset.padded_batch(
      self.batch_size, 
      padded_shapes=((tf.TensorShape([None]), tf.TensorShape([])), (tf.TensorShape([None]), tf.TensorShape([]))), 
      padding_values=((src_eos_id, 0), (tgt_eos_id, 0)) 
    ) 
    return batched_set 

def _batch_infer_data(self, dataset, src_eos_id): 
    batched_set = dataset.padded_batch(
     self.batch_size, 
     padded_shapes=(tf.TensorShape([None]), tf.TensorShape([])), 
     padding_values=(src_eos_id, 0) 
    ) 
    return batched_set 

def _create_vocab_tables(self, vocab_files, share_vocab=False): 
    if vocab_files[1] is None and share_vocab == False: 
     raise ValueError('If share_vocab is set to false must provide target vocab. (src_vocab_file, \ 
       target_vocab_file)') 

    src_vocab_table = lookup_ops.index_table_from_file(
     vocab_files[0], 
     default_value=UNK_ID 
    ) 

    if share_vocab: 
     tgt_vocab_table = src_vocab_table 
    else: 
     tgt_vocab_table = lookup_ops.index_table_from_file(
      vocab_files[1], 
      default_value=UNK_ID 
     ) 

    return src_vocab_table, tgt_vocab_table 

def _prepare_iterator_hook(self, hook, scope_name, iterator, file_path, name_placeholder): 
    if self.mode == tf.estimator.ModeKeys.TRAIN or self.mode == tf.estimator.ModeKeys.EVAL: 
     feed_dict = { 
       name_placeholder[0]: file_path[0], 
       name_placeholder[1]: file_path[1] 
     } 
    else: 
     feed_dict = {name_placeholder: file_path} 

    with tf.name_scope(scope_name): 
     hook.iterator_initializer_func = \ 
       lambda sess: sess.run(
        iterator.initializer, 
        feed_dict=feed_dict, 
       ) 

def _set_up_train_or_eval(self, scope_name, file_path): 
    hook = IteratorInitializerHook() 
    def input_fn(): 
     with tf.name_scope(scope_name): 
      with tf.name_scope('sentence_markers'): 
       src_eos_id = tf.constant(self.src_eos_id, dtype=tf.int64) 
       tgt_eos_id = tf.constant(self.tgt_eos_id, dtype=tf.int64) 
      self.vocab_tables = self._create_vocab_tables(self.vocab_files, self.share_vocab) 
      in_file = tf.placeholder(tf.string, shape=()) 
      in_dataset = self._prepare_data(tf.contrib.data.TextLineDataset(in_file).repeat(None)) 
      out_file = tf.placeholder(tf.string, shape=()) 
      out_dataset = self._prepare_data(tf.contrib.data.TextLineDataset(out_file).repeat(None)) 
      dataset = tf.contrib.data.Dataset.zip((in_dataset, out_dataset)) 
      dataset = self._batch_data(dataset, src_eos_id, tgt_eos_id) 
      iterator = dataset.make_initializable_iterator() 
      next_example, next_label = iterator.get_next() 
      self._prepare_iterator_hook(hook, scope_name, iterator, file_path, (in_file, out_file)) 
      return next_example, next_label 

    return (input_fn, hook) 

def _training_input_hook(self, file_path): 
    input_fn, hook = self._set_up_train_or_eval('train_inputs', file_path) 

    return (input_fn, hook) 

def _validation_input_hook(self, file_path): 
    input_fn, hook = self._set_up_train_or_eval('eval_inputs', file_path) 

    return (input_fn, hook) 

def _infer_input_hook(self, file_path, num_infer): 
    hook = IteratorInitializerHook() 

    def input_fn(): 
     with tf.name_scope('infer_inputs'): 
      with tf.name_scope('sentence_markers'): 
       src_eos_id = tf.constant(self.src_eos_id, dtype=tf.int64) 
      self.vocab_tables = self._create_vocab_tables(self.vocab_files, self.share_vocab) 
      infer_file = tf.placeholder(tf.string, shape=()) 
      dataset = tf.contrib.data.TextLineDataset(infer_file) 
      dataset = self._prepare_data(dataset) 
      dataset = self._batch_infer_data(dataset, src_eos_id) 
      iterator = dataset.make_initializable_iterator() 
      next_example, seq_len = iterator.get_next() 
      self._prepare_iterator_hook(hook, 'infer_inputs', iterator, file_path, infer_file) 
      return ((next_example, seq_len), None) 

    return (input_fn, hook) 

그리고 내 모델입니다 : 당신이 작은 데이터를 양성 경우

class Seq2Seq(): 

def __init__(
    self, batch_size, inputs, 
    outputs, inp_vocab_size, tgt_vocab_size, 
    embed_dim, mode, time_major=False, 
    enc_embedding=None, dec_embedding=None, average_across_batch=True, 
    average_across_timesteps=True, vocab_path=None, embedding_path='./data_files/wiki.simple.vec' 
): 
    embed_np = self._get_embedding(embedding_path) 
    if not enc_embedding: 
     self.enc_embedding = tf.contrib.layers.embed_sequence(
      inputs, 
      inp_vocab_size, 
      embed_dim, 
      trainable=True, 
      scope='embed', 
      initializer=tf.constant_initializer(value=embed_np, dtype=tf.float32) 
     ) 
    else: 
     self.enc_embedding = enc_embedding 
    if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: 
     if not dec_embedding: 
      embed_outputs = tf.contrib.layers.embed_sequence(
       outputs, 
       tgt_vocab_size, 
       embed_dim, 
       trainable=True, 
       scope='embed', 
       reuse=True 
      ) 
      with tf.variable_scope('embed', reuse=True): 
       dec_embedding = tf.get_variable('embeddings') 
      self.embed_outputs = embed_outputs 
      self.dec_embedding = dec_embedding 

     else: 
      self.dec_embedding = dec_embedding 
    else: 
     with tf.variable_scope('embed', reuse=True): 
      self.dec_embedding = tf.get_variable('embeddings') 

    if mode == tf.estimator.ModeKeys.PREDICT and vocab_path is None: 
     raise ValueError('If mode is predict, must supply vocab_path') 
    self.vocab_path = vocab_path 
    self.inp_vocab_size = inp_vocab_size 
    self.tgt_vocab_size = tgt_vocab_size 
    self.average_across_batch = average_across_batch 
    self.average_across_timesteps = average_across_timesteps 
    self.time_major = time_major 
    self.batch_size = batch_size 
    self.mode = mode 

def _get_embedding(self, embedding_path): 
    model = KeyedVectors.load_word2vec_format(embedding_path) 
    vocab = model.vocab 
    vocab_len = len(vocab) 
    return np.array([model.word_vec(k) for k in vocab.keys()]) 

def _get_lstm(self, num_units): 
    return tf.nn.rnn_cell.BasicLSTMCell(num_units) 

def encode(self, num_units, num_layers, seq_len, cell_fw=None, cell_bw=None): 
    if cell_fw and cell_bw: 
     fw_cell = cell_fw 
     bw_cell = cell_bw 
    else: 
     fw_cell = self._get_lstm(num_units) 
     bw_cell = self._get_lstm(num_units) 
    encoder_outputs, bi_encoder_state = tf.nn.bidirectional_dynamic_rnn(
     fw_cell, 
     bw_cell, 
     self.enc_embedding, 
     sequence_length=seq_len, 
     time_major=self.time_major, 
     dtype=tf.float32 
    ) 
    c_state = tf.concat([bi_encoder_state[0].c, bi_encoder_state[1].c], axis=1) 
    h_state = tf.concat([bi_encoder_state[0].h, bi_encoder_state[1].h], axis=1) 
    encoder_state = tf.contrib.rnn.LSTMStateTuple(c=c_state, h=h_state) 
    return tf.concat(encoder_outputs, -1), encoder_state 

def _train_decoder(self, decoder_cell, out_seq_len, encoder_state, helper): 
    if not helper: 
     helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
      self.embed_outputs, 
      out_seq_len, 
      self.dec_embedding, 
      0.3, 
     ) 
     # helper = tf.contrib.seq2seq.TrainingHelper(
     #  self.dec_embedding, 
     #  out_seq_len, 
     #) 
    projection_layer = layers_core.Dense(self.tgt_vocab_size, use_bias=False) 
    decoder = tf.contrib.seq2seq.BasicDecoder(
     decoder_cell, 
     helper, 
     encoder_state, 
     output_layer=projection_layer 
    ) 
    return decoder 

def _predict_decoder(self, cell, encoder_state, beam_width, length_penalty_weight): 
    tiled_encoder_state = tf.contrib.seq2seq.tile_batch(
     encoder_state, multiplier=beam_width 
    ) 
    with tf.name_scope('sentence_markers'): 
     sos_id = tf.constant(1, dtype=tf.int32) 
     eos_id = tf.constant(2, dtype=tf.int32) 
    start_tokens = tf.fill([self.batch_size], sos_id) 
    end_token = eos_id 
    projection_layer = layers_core.Dense(self.tgt_vocab_size, use_bias=False) 
    emb = tf.squeeze(self.dec_embedding) 
    decoder = tf.contrib.seq2seq.BeamSearchDecoder(
     cell=cell, 
     embedding=self.dec_embedding, 
     start_tokens=start_tokens, 
     end_token=end_token, 
     initial_state=tiled_encoder_state, 
     beam_width=beam_width, 
     output_layer=projection_layer, 
     length_penalty_weight=length_penalty_weight 
    ) 
    return decoder 

def decode(
    self, num_units, out_seq_len, 
    encoder_state, cell=None, helper=None, 
    beam_width=None, length_penalty_weight=None 
): 
    with tf.name_scope('Decode'): 
     if cell: 
      decoder_cell = cell 
     else: 
      decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(2*num_units) 
     if self.mode != estimator.ModeKeys.PREDICT: 
      decoder = self._train_decoder(decoder_cell, out_seq_len, encoder_state, helper) 
     else: 
      decoder = self._predict_decoder(decoder_cell, encoder_state, beam_width, length_penalty_weight) 
     outputs = tf.contrib.seq2seq.dynamic_decode(
      decoder, 
      maximum_iterations=20, 
      swap_memory=True, 
     ) 
     outputs = outputs[0] 
     if self.mode != estimator.ModeKeys.PREDICT: 
      return outputs.rnn_output, outputs.sample_id 
     else: 
      return outputs.beam_search_decoder_output, outputs.predicted_ids 

def prepare_predict(self, sample_id): 
    rev_table = lookup_ops.index_to_string_table_from_file(
     self.vocab_path, default_value=UNK) 
    predictions = rev_table.lookup(tf.to_int64(sample_id)) 
    return tf.estimator.EstimatorSpec(
     predictions=predictions, 
     mode=tf.estimator.ModeKeys.PREDICT 
    ) 

def prepare_train_eval(
    self, t_out, 
    out_seq_len, labels, lr, 
    train_op=None, loss=None 
): 
    if not loss: 
     weights = tf.sequence_mask(
      out_seq_len, 
      dtype=t_out.dtype 
     ) 
     loss = tf.contrib.seq2seq.sequence_loss(
      t_out, 
      labels, 
      weights, 
      average_across_batch=self.average_across_batch, 
     ) 

    if not train_op: 
     train_op = tf.contrib.layers.optimize_loss(
      loss, 
      tf.train.get_global_step(), 
      optimizer='SGD', 
      learning_rate=lr, 
      summaries=['loss', 'learning_rate'] 
     ) 

    return tf.estimator.EstimatorSpec(
     mode=self.mode, 
     loss=loss, 
     train_op=train_op, 
    ) 

답변

1

다음, F 매개 변수의 수를 감소시키는 시도가

여기 내 데이터 세트는 Preperation 코드입니다. 이자형. 각 층의 뉴런 수.

나를 위해 네트워크가 항상 한 단어를 출력하면 학습 속도가 크게 감소합니다.

+0

이것은 전체 네트워크에 긍정적 인 영향을 미쳤습니다. – iantbutler

관련 문제