0
저는 잠시 동안 프로젝트를 진행해 왔으며, 우리는 이 매우 작습니다. 데이터가 아주 적습니다. 훨씬 더 큰 데이터 세트를 만들 수 있다면 훨씬 더 좋아질 것입니다. 즉, 옆으로, 순간 내 문제는 내가 문장 입력이있을 때, 내 출력이 지금 당장 같이이다 :Seq2Seq 네트워크가 단어를 반복해서 출력 할 때해야 할 일은 무엇입니까?
contactId의 contactId의 contactId의 contactId를 하나의 단어에 초점을 맞추고 이상 반복
및 다시. 이 장애물을 극복하기 위해 무엇을 할 수 있습니까?
상황은 나는 시도했다 :
- 더블 내가, 내가 Vocab의 공유를하고 시작/토큰을 중지하고 토큰이 제대로 자신의 Vocab의 파일의 상단에 넣고 확인 추가 된 검사.
- 단어 삽입 문제로 인해 발생할 수 있다는 것을 발견했습니다. 그 결과 나는 tensorboard로 확인해 보았고 충분한 PCA가 매우 조밀 한 점 집합을 보였다. 페이스 북의 사전 훈련 된 단어 벡터를 잡아서 임베디드로로드했다는 것을 알았습니다. 다시 훈련을 받았고 이번에는 tensorboard PCA가 훨씬 더 나은 그림을 보여주었습니다.
- 기본에서 SampledScheduling으로 나의 훈련 스케줄러를 전환하여 때때로 훈련 결과를 지상 진실로 바꿉니다.
- 빔 디코더를 사용하도록 내 디코더를 전환했습니다. 단어 선택이 중개 기능 영역에서 근접해있는 경우 더 강력한 응답을 줄 수 있다고 생각했습니다.
내 혼란이 꾸준히 줄어들고 있습니다. 여기
class ModelInputs(object):
"""Factory to construct various input hooks and functions depending on mode """
def __init__(
self, vocab_files, batch_size,
share_vocab=True, src_eos_id=1, tgt_eos_id=2
):
self.batch_size = batch_size
self.vocab_files = vocab_files
self.share_vocab = share_vocab
self.src_eos_id = src_eos_id
self.tgt_eos_id = tgt_eos_id
def get_inputs(self, file_path, num_infer=None, mode=tf.estimator.ModeKeys.TRAIN):
self.mode = mode
if self.mode == tf.estimator.ModeKeys.TRAIN:
return self._training_input_hook(file_path)
if self.mode == tf.estimator.ModeKeys.EVAL:
return self._validation_input_hook(file_path)
if self.mode == tf.estimator.ModeKeys.PREDICT:
if num_infer is None:
raise ValueError('If performing inference must supply number of predictions to be made.')
return self._infer_input_hook(file_path, num_infer)
def _prepare_data(self, dataset, out=False):
prep_set = dataset.map(lambda string: tf.string_split([string]).values)
prep_set = prep_set.map(lambda words: (words, tf.size(words)))
if out == True:
return prep_set.map(lambda words, size: (self.vocab_tables[1].lookup(words), size))
return prep_set.map(lambda words, size: (self.vocab_tables[0].lookup(words), size))
def _batch_data(self, dataset, src_eos_id, tgt_eos_id):
batched_set = dataset.padded_batch(
self.batch_size,
padded_shapes=((tf.TensorShape([None]), tf.TensorShape([])), (tf.TensorShape([None]), tf.TensorShape([]))),
padding_values=((src_eos_id, 0), (tgt_eos_id, 0))
)
return batched_set
def _batch_infer_data(self, dataset, src_eos_id):
batched_set = dataset.padded_batch(
self.batch_size,
padded_shapes=(tf.TensorShape([None]), tf.TensorShape([])),
padding_values=(src_eos_id, 0)
)
return batched_set
def _create_vocab_tables(self, vocab_files, share_vocab=False):
if vocab_files[1] is None and share_vocab == False:
raise ValueError('If share_vocab is set to false must provide target vocab. (src_vocab_file, \
target_vocab_file)')
src_vocab_table = lookup_ops.index_table_from_file(
vocab_files[0],
default_value=UNK_ID
)
if share_vocab:
tgt_vocab_table = src_vocab_table
else:
tgt_vocab_table = lookup_ops.index_table_from_file(
vocab_files[1],
default_value=UNK_ID
)
return src_vocab_table, tgt_vocab_table
def _prepare_iterator_hook(self, hook, scope_name, iterator, file_path, name_placeholder):
if self.mode == tf.estimator.ModeKeys.TRAIN or self.mode == tf.estimator.ModeKeys.EVAL:
feed_dict = {
name_placeholder[0]: file_path[0],
name_placeholder[1]: file_path[1]
}
else:
feed_dict = {name_placeholder: file_path}
with tf.name_scope(scope_name):
hook.iterator_initializer_func = \
lambda sess: sess.run(
iterator.initializer,
feed_dict=feed_dict,
)
def _set_up_train_or_eval(self, scope_name, file_path):
hook = IteratorInitializerHook()
def input_fn():
with tf.name_scope(scope_name):
with tf.name_scope('sentence_markers'):
src_eos_id = tf.constant(self.src_eos_id, dtype=tf.int64)
tgt_eos_id = tf.constant(self.tgt_eos_id, dtype=tf.int64)
self.vocab_tables = self._create_vocab_tables(self.vocab_files, self.share_vocab)
in_file = tf.placeholder(tf.string, shape=())
in_dataset = self._prepare_data(tf.contrib.data.TextLineDataset(in_file).repeat(None))
out_file = tf.placeholder(tf.string, shape=())
out_dataset = self._prepare_data(tf.contrib.data.TextLineDataset(out_file).repeat(None))
dataset = tf.contrib.data.Dataset.zip((in_dataset, out_dataset))
dataset = self._batch_data(dataset, src_eos_id, tgt_eos_id)
iterator = dataset.make_initializable_iterator()
next_example, next_label = iterator.get_next()
self._prepare_iterator_hook(hook, scope_name, iterator, file_path, (in_file, out_file))
return next_example, next_label
return (input_fn, hook)
def _training_input_hook(self, file_path):
input_fn, hook = self._set_up_train_or_eval('train_inputs', file_path)
return (input_fn, hook)
def _validation_input_hook(self, file_path):
input_fn, hook = self._set_up_train_or_eval('eval_inputs', file_path)
return (input_fn, hook)
def _infer_input_hook(self, file_path, num_infer):
hook = IteratorInitializerHook()
def input_fn():
with tf.name_scope('infer_inputs'):
with tf.name_scope('sentence_markers'):
src_eos_id = tf.constant(self.src_eos_id, dtype=tf.int64)
self.vocab_tables = self._create_vocab_tables(self.vocab_files, self.share_vocab)
infer_file = tf.placeholder(tf.string, shape=())
dataset = tf.contrib.data.TextLineDataset(infer_file)
dataset = self._prepare_data(dataset)
dataset = self._batch_infer_data(dataset, src_eos_id)
iterator = dataset.make_initializable_iterator()
next_example, seq_len = iterator.get_next()
self._prepare_iterator_hook(hook, 'infer_inputs', iterator, file_path, infer_file)
return ((next_example, seq_len), None)
return (input_fn, hook)
그리고 내 모델입니다 : 당신이 작은 데이터를 양성 경우
class Seq2Seq():
def __init__(
self, batch_size, inputs,
outputs, inp_vocab_size, tgt_vocab_size,
embed_dim, mode, time_major=False,
enc_embedding=None, dec_embedding=None, average_across_batch=True,
average_across_timesteps=True, vocab_path=None, embedding_path='./data_files/wiki.simple.vec'
):
embed_np = self._get_embedding(embedding_path)
if not enc_embedding:
self.enc_embedding = tf.contrib.layers.embed_sequence(
inputs,
inp_vocab_size,
embed_dim,
trainable=True,
scope='embed',
initializer=tf.constant_initializer(value=embed_np, dtype=tf.float32)
)
else:
self.enc_embedding = enc_embedding
if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
if not dec_embedding:
embed_outputs = tf.contrib.layers.embed_sequence(
outputs,
tgt_vocab_size,
embed_dim,
trainable=True,
scope='embed',
reuse=True
)
with tf.variable_scope('embed', reuse=True):
dec_embedding = tf.get_variable('embeddings')
self.embed_outputs = embed_outputs
self.dec_embedding = dec_embedding
else:
self.dec_embedding = dec_embedding
else:
with tf.variable_scope('embed', reuse=True):
self.dec_embedding = tf.get_variable('embeddings')
if mode == tf.estimator.ModeKeys.PREDICT and vocab_path is None:
raise ValueError('If mode is predict, must supply vocab_path')
self.vocab_path = vocab_path
self.inp_vocab_size = inp_vocab_size
self.tgt_vocab_size = tgt_vocab_size
self.average_across_batch = average_across_batch
self.average_across_timesteps = average_across_timesteps
self.time_major = time_major
self.batch_size = batch_size
self.mode = mode
def _get_embedding(self, embedding_path):
model = KeyedVectors.load_word2vec_format(embedding_path)
vocab = model.vocab
vocab_len = len(vocab)
return np.array([model.word_vec(k) for k in vocab.keys()])
def _get_lstm(self, num_units):
return tf.nn.rnn_cell.BasicLSTMCell(num_units)
def encode(self, num_units, num_layers, seq_len, cell_fw=None, cell_bw=None):
if cell_fw and cell_bw:
fw_cell = cell_fw
bw_cell = cell_bw
else:
fw_cell = self._get_lstm(num_units)
bw_cell = self._get_lstm(num_units)
encoder_outputs, bi_encoder_state = tf.nn.bidirectional_dynamic_rnn(
fw_cell,
bw_cell,
self.enc_embedding,
sequence_length=seq_len,
time_major=self.time_major,
dtype=tf.float32
)
c_state = tf.concat([bi_encoder_state[0].c, bi_encoder_state[1].c], axis=1)
h_state = tf.concat([bi_encoder_state[0].h, bi_encoder_state[1].h], axis=1)
encoder_state = tf.contrib.rnn.LSTMStateTuple(c=c_state, h=h_state)
return tf.concat(encoder_outputs, -1), encoder_state
def _train_decoder(self, decoder_cell, out_seq_len, encoder_state, helper):
if not helper:
helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper(
self.embed_outputs,
out_seq_len,
self.dec_embedding,
0.3,
)
# helper = tf.contrib.seq2seq.TrainingHelper(
# self.dec_embedding,
# out_seq_len,
#)
projection_layer = layers_core.Dense(self.tgt_vocab_size, use_bias=False)
decoder = tf.contrib.seq2seq.BasicDecoder(
decoder_cell,
helper,
encoder_state,
output_layer=projection_layer
)
return decoder
def _predict_decoder(self, cell, encoder_state, beam_width, length_penalty_weight):
tiled_encoder_state = tf.contrib.seq2seq.tile_batch(
encoder_state, multiplier=beam_width
)
with tf.name_scope('sentence_markers'):
sos_id = tf.constant(1, dtype=tf.int32)
eos_id = tf.constant(2, dtype=tf.int32)
start_tokens = tf.fill([self.batch_size], sos_id)
end_token = eos_id
projection_layer = layers_core.Dense(self.tgt_vocab_size, use_bias=False)
emb = tf.squeeze(self.dec_embedding)
decoder = tf.contrib.seq2seq.BeamSearchDecoder(
cell=cell,
embedding=self.dec_embedding,
start_tokens=start_tokens,
end_token=end_token,
initial_state=tiled_encoder_state,
beam_width=beam_width,
output_layer=projection_layer,
length_penalty_weight=length_penalty_weight
)
return decoder
def decode(
self, num_units, out_seq_len,
encoder_state, cell=None, helper=None,
beam_width=None, length_penalty_weight=None
):
with tf.name_scope('Decode'):
if cell:
decoder_cell = cell
else:
decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(2*num_units)
if self.mode != estimator.ModeKeys.PREDICT:
decoder = self._train_decoder(decoder_cell, out_seq_len, encoder_state, helper)
else:
decoder = self._predict_decoder(decoder_cell, encoder_state, beam_width, length_penalty_weight)
outputs = tf.contrib.seq2seq.dynamic_decode(
decoder,
maximum_iterations=20,
swap_memory=True,
)
outputs = outputs[0]
if self.mode != estimator.ModeKeys.PREDICT:
return outputs.rnn_output, outputs.sample_id
else:
return outputs.beam_search_decoder_output, outputs.predicted_ids
def prepare_predict(self, sample_id):
rev_table = lookup_ops.index_to_string_table_from_file(
self.vocab_path, default_value=UNK)
predictions = rev_table.lookup(tf.to_int64(sample_id))
return tf.estimator.EstimatorSpec(
predictions=predictions,
mode=tf.estimator.ModeKeys.PREDICT
)
def prepare_train_eval(
self, t_out,
out_seq_len, labels, lr,
train_op=None, loss=None
):
if not loss:
weights = tf.sequence_mask(
out_seq_len,
dtype=t_out.dtype
)
loss = tf.contrib.seq2seq.sequence_loss(
t_out,
labels,
weights,
average_across_batch=self.average_across_batch,
)
if not train_op:
train_op = tf.contrib.layers.optimize_loss(
loss,
tf.train.get_global_step(),
optimizer='SGD',
learning_rate=lr,
summaries=['loss', 'learning_rate']
)
return tf.estimator.EstimatorSpec(
mode=self.mode,
loss=loss,
train_op=train_op,
)
이것은 전체 네트워크에 긍정적 인 영향을 미쳤습니다. – iantbutler