{"id":427,"date":"2019-07-31T16:08:38","date_gmt":"2019-07-31T07:08:38","guid":{"rendered":"https:\/\/aiandstory.net\/?p=427"},"modified":"2019-07-31T16:08:40","modified_gmt":"2019-07-31T07:08:40","slug":"post-427","status":"publish","type":"post","link":"https:\/\/aiandstory.net\/?p=427","title":{"rendered":"A-4.End-To-End Memory Networks\u306e\u5229\u7528"},"content":{"rendered":"\n<p>\u4efb\u610f\u306e\u9577\u3055\u30c6\u30ad\u30b9\u30c8\u3092End-To-End Memory Networks\u3068RNN Encoder-Decoder with Attention\u3092\u5229\u7528\u3057\u305f\u30e2\u30c7\u30eb\u306b\u5b66\u7fd2\u3055\u305b\u308b\u305f\u3081\u306eKeras\u306e\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3002\u300c\u30e2\u30c7\u30eb\u306e\u5b9a\u7fa9\u3068\u8a13\u7df4\u300d\u3068\u300c\u4fdd\u5b58\u3057\u305f\u30e2\u30c7\u30eb\u306b\u3088\u308b\u6587\u7ae0\u751f\u6210\u300d\u306f\u5225\u3005\u306b\u5b9f\u884c\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>\n\n \u30b5\u30f3\u30d7\u30eb\u30c7\u30fc\u30bf(\u8981\u89e3\u51cd)<br><a href=\"https:\/\/aiandstory.net\/sample.zip\">https:\/\/aiandstory.net\/sample.zip<\/a><\/p>\n\n\n\n<h4 class=\"wp-block-heading\"> \u30e2\u30c7\u30eb\u306e\u5b9a\u7fa9\u3068\u8a13\u7df4 <\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code># -*- coding: utf-8 -*-\n\n\nfrom __future__ import print_function\nfrom tensorflow.keras.models import Sequential, Model\nfrom tensorflow.keras.layers import Dense, LSTM, Embedding\nfrom tensorflow.keras.layers import Reshape, RepeatVector, TimeDistributed, Activation\nfrom tensorflow.keras.layers import add, concatenate, Flatten, dot, Lambda, Permute\nfrom tensorflow.keras.optimizers import Adamax\nfrom tensorflow.keras import Input\n\nimport numpy as np\nimport random\nimport sys\nimport warnings\nwarnings.filterwarnings('ignore')\n\n#utf-8\u306e\u30c6\u30ad\u30b9\u30c8\u3092\u6e96\u5099\npath=\"sample.txt\"\n\n#\u4e00\u62ec\u3067\u8aad\u307f\u8fbc\u307f\nwith open(path, encoding='utf-8') as f:\n    text = f.read()\nprint('corpus length:', len(text))\n\n#\u7a7a\u767d\u3092\u9664\u53bb\ntokens = text.split()\ntext = ''.join(tokens)\n\n#\u6587\u5b57\u306e\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u3092\u4f5c\u6210\u3059\u308b\u305f\u3081\u306b\u30bd\u30fc\u30c8\nchars = sorted(list(set(text)))\nprint('total chars:', len(chars))\n\n#\u6587\u5b57\uff1c\uff0d\uff1e\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306e\u8f9e\u66f8\u3092\u6e96\u5099\nchar_indices = dict((c, i) for i, c in enumerate(chars))\nindices_char = dict((i, c) for i, c in enumerate(chars))\n\n#\u5b66\u7fd2\u53ca\u3073\u6b63\u89e3\u30c7\u30fc\u30bf\u306e\u6e96\u5099\nembdimsize=512#Embedding\u5c64\u306e\u51fa\u529b\u6b21\u5143\nmaxlen = 30#\u5165\u529b\u30c7\u30fc\u30bf\u306e\u6587\u5b57\u6570\ngen_charlen = 1#\u51fa\u529b(\u56de\u7b54)\u30c7\u30fc\u30bf\u306e\u6587\u5b57\u6570\nstep = 1\n\n# build the model: \nprint('Build model...')\n\n#\u8a9e\u5f59\u6570\u304cEmbedding\u5c64\u306e\u5165\u529b\u6b21\u5143\u306b\u306a\u308b\nvcab_size = len(chars)\n\n#\u4ed6\u306e\u30d5\u30a1\u30a4\u30eb\u3067import\u3059\u308b\u5834\u5408\u3001\u4ee5\u964d\u306f\u5b9f\u884c\u3057\u306a\u3044\u3002\nif __name__ == \"__main__\":#import\u306b\u5fc5\u8981\n\n    #End-To-End Memory Networks\u7528\u306e\u51e6\u7406\u306e\u958b\u59cb[5]\n    #\u6587\u5b57\u306e\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u304c\u5165\u529b\u30c7\u30fc\u30bf\u306a\u306e\u3067int32\u3092\u6307\u5b9a,shape\u306b\u306f\u30b5\u30f3\u30d7\u30eb\u6570\u306e\u6b21\u5143\u306e\u8ef8\u306f\u542b\u307e\u306a\u3044\n    #\u3053\u306e\u6307\u5b9a\u3067\u306fmaxlen\u6b21\u5143\u306e\u30d9\u30af\u30c8\u30eb\u304c\u5165\u529b\u3068\u306a\u308b\n    text_input_s = Input(shape=(maxlen,), dtype='int32', name='text_s')\n    text_input_q = Input(shape=(maxlen,), dtype='int32', name='text_q')\n    #\u57cb\u3081\u8fbc\u307f\u5c64\u3002output_dim\u6b21\u5143\u306e\u30d9\u30af\u30c8\u30eb\u30b7\u30fc\u30b1\u30f3\u30b9\u306b\u57cb\u3081\u8fbc\u3080\n    input_encoded_m = Embedding(input_dim=vcab_size ,output_dim=embdimsize)(text_input_s)\n    input_encoded_c = Embedding(input_dim=vcab_size, output_dim=maxlen)(text_input_s)\n    question_encoded = Embedding(input_dim=vcab_size, output_dim=embdimsize)(text_input_q)\n\n    #\u5165\u529b\u30c7\u30fc\u30bf\uff08\u554f\u984c\uff09\u30d9\u30af\u30c8\u30eb\u30b7\u30fc\u30b1\u30f3\u30b9\u3068\u5165\u529b\u30c7\u30fc\u30bf\uff08\u8cea\u554f\uff09\u306e\u30d9\u30af\u30c8\u30eb\u30b7\u30fc\u30b1\u30f3\u30b9\u306e\u9593\u306e '\u4e00\u81f4'\u3092\u8a08\u7b97\n    match = dot([input_encoded_m, question_encoded], axes=(2, 2))\n    match = Activation('softmax')(match)\n\n    #2\u756a\u76ee\u306e\u5165\u529b\u30c7\u30fc\u30bf\uff08\u554f\u984c\uff09\u306e\u30d9\u30af\u30c8\u30eb\u30b7\u30fc\u30b1\u30f3\u30b9\u306b\u4e0a\u8a18\u306e\u7d50\u679c\u3092\u8ffd\u52a0\u3059\u308b\n    response = add([match, input_encoded_c])\n    response = Permute((2, 1))(response)\n\n    # \u4e0a\u8a18\u306e\u7d50\u679c\u3068\u5165\u529b\u30c7\u30fc\u30bf\uff08\u8cea\u554f\uff09\u306e\u30d9\u30af\u30c8\u30eb\u30b7\u30fc\u30b1\u30f3\u30b9\u3092\u9023\u7d50\u3059\u308b\n    answer = concatenate([response, question_encoded])\n\n    #End-To-End Memory Networks\u7528\u306e\u51e6\u7406\u306e\u7d42\u308f\u308a[5]\n\n    hidden_unit = 128#LSTM\u306e\u96a0\u308c\u30e6\u30cb\u30c3\u30c8\u6570\n    output = answer\n    \n    #-----\u30a8\u30f3\u30b3\u30fc\u30c0\u30fc\uff1a\u5165\u529b\u30c7\u30fc\u30bf\u3092\u56fa\u5b9a\u9577\u306e\u72b6\u614b\u30d9\u30af\u30c8\u30eb\u306b\u30a8\u30f3\u30b3\u30fc\u30c9\u3059\u308b-----\n    #\u30a2\u30c6\u30f3\u30b7\u30e7\u30f3\u306e\u51e6\u7406\u7528\u306breturn_sequences=True\u304c\u5fc5\u8981\u3002\u518d\u5e30\u6642\u306e\u9014\u4e2d\uff08\u7cfb\u5217\uff09\u306e\u30c7\u30fc\u30bf\u3082\u51fa\u529b\u3059\u308b\n    output_enc = LSTM(hidden_unit, return_sequences=True, dropout=0.2, recurrent_dropout=0.5)(output)\n    #\u30c7\u30b3\u30fc\u30c0\u30fc\u306e\uff2c\uff33\uff34\uff2d\u5c64\u306e\u5165\u529b\u7528\u306b\u3001\u6700\u5f8c\u306e\u72b6\u614b\u30d9\u30af\u30c8\u30eb\u3060\u3051\u53d6\u308a\u51fa\u3059\u3002return_sequences=False\u306e\u4ee3\u308f\u308a\n    output_enc_last = Lambda(lambda x: x[:,-1,:], output_shape=(None, hidden_unit))(output_enc)\n    #--------------------\n\n    #-----\u30c7\u30b3\u30fc\u30c0\u30fc\uff1a\u72b6\u614b\u30d9\u30af\u30c8\u30eb\u3092\u51fa\u529b\u30c7\u30fc\u30bf\u306b\u30c7\u30b3\u30fc\u30c9\u3059\u308b-----\n    #\u51fa\u529b\u30c7\u30fc\u30bf\u306e\u6587\u5b57\u6570\u5206\u3060\u3051\u51e6\u7406\u3092\u7e70\u308a\u8fd4\u3059\n    output_dec = RepeatVector(gen_charlen)(output_enc_last)\n    #\u30c7\u30b3\u30fc\u30c0\u306e\uff2c\uff33\uff34\uff2d\u5c64\n    output_dec = LSTM(hidden_unit, return_sequences=True, dropout=0.2, recurrent_dropout=0.5)(output_dec)\n    \n    #-----Attention\u7528\u306e\u51e6\u7406-----\n    #[3]\u306b\u304a\u3051\u308b(7)\u306e\u5b9f\u88c5\n    attention = dot([output_dec, output_enc], axes=[2, 2])\n    attention = Activation('softmax')(attention)    \n    context = dot([attention, output_enc], axes=[2,1])\n    decoder_combined_context = concatenate([context, output_dec])\n\n    output_dec = decoder_combined_context\n\n    #TimeDistributed\u306fRepeatVector\u306e\u7e70\u308a\u8fd4\u3057\u5206\u306b\u30ec\u30a4\u30e4\u30fc\uff08\u3053\u3053\u3067\u306fDense\uff09\u3092\u9069\u7528\n    #[3]\u306b\u304a\u3051\u308b(5)\u306e\u5b9f\u88c5\n    output_dec = TimeDistributed(Dense(512, activation=\"tanh\"))(output_dec) \n    #----Attetion\u7d42\u4e86--------\n    \n    #softmax\u3067\u5404\u6587\u5b57\u6bce(vcab_size=\u8a9e\u5f59\u6570)\u306e\u78ba\u7387\u3092\u51fa\u529b\n    output_dec = TimeDistributed(Dense(vcab_size, activation='softmax'))(output_dec)\n    #----\u30c7\u30b3\u30fc\u30c0\u30fc\u7d42\u4e86---------\n\n    #\u30e2\u30c7\u30eb\u306e\u30a4\u30f3\u30b9\u30bf\u30f3\u30b9\u5316\n    model = Model([text_input_s, text_input_q], output_dec)\n    \n    optimizer = Adamax(lr=0.01)\n    #softmax\u3067\u6570\u5024\uff08one-hot\u8868\u73fe\u3067\u306f\u306a\u304f\u6570\u5024\u306e\u914d\u5217\uff09\u306e\u30bf\u30fc\u30b2\u30c3\u30c8\u3092\u51e6\u7406\u3059\u308b\u5834\u5408\u306fspasparse_categorical_crossentropy\u3092\u6307\u5b9a\u3059\u308b\u3002\n    #\u3053\u306e\u6307\u5b9a\u306f\u3001\u5165\u529b\u30c7\u30fc\u30bf\u3054\u3068\u306b\u78ba\u7387\u30b9\u30b3\u30a2\u3092\u51fa\u529b\u3059\u308b\u305f\u3081\u3001\u30e2\u30c7\u30eb\u306e\u6700\u7d42\u7684\u306a\u51fa\u529b\u5f62\u72b6\u306b\u306f\u5f71\u97ff\u3057\u306a\u3044\u3002\n    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer,  metrics=['accuracy'])\n    model.summary()\n    \n \n    #\u5b66\u7fd2\u7528\u30c7\u30fc\u30bf\u306e\u6e96\u5099   \n    sentences_s = []#\u5165\u529b\u30c7\u30fc\u30bf\u306e\u30ea\u30b9\u30c8(\u554f\u984c\u6587)\n    sentences_q = []#\u5165\u529b\u30c7\u30fc\u30bf\u306e\u30ea\u30b9\u30c8\uff08\u8cea\u554f\uff09\n    next_chars = []#\u51fa\u529b(\u56de\u7b54)\u30c7\u30fc\u30bf\u306e\u30ea\u30b9\u30c8\n\n    for i in range(0, len(text) - maxlen - gen_charlen, step):\n        sentences_s.append([char_indices[char] for char in text[i: i + maxlen]])\n        sentences_q.append([char_indices[char] for char in text[i: i + maxlen]])#\u3053\u306e\u4f8b\u3067\u306fsentences_s\u3068\u540c\u4e00\u3060\u304c\u3001\u672c\u6765\u306f\u7570\u306a\u308b\n        next_chars.append([char_indices[char] for char in text[i + maxlen: i + maxlen + gen_charlen]])\n    print('input data sequences:', len(sentences_s))\n        \n    #model.fit()\u7528\u306bNumPy\u914d\u5217\u3092\u751f\u6210\u3059\u308b\u3002\n    x_s = np.array(sentences_s)\n    x_q = np.array(sentences_q)\n    y = np.array(next_chars)\n    #sparse_categorical_crossentropy\u306e\u51fa\u529b\u3068\u6574\u5408\u3059\u308b\u7528\u306bshape\u3092\u5909\u66f4\u3059\u308b\n    y = np.reshape(y, (-1, gen_charlen, 1))\n    \n    #x\u306f\u5165\u529b\u30c7\u30fc\u30bf\u3001y\u306f\u51fa\u529b(\u6b63\u89e3)\u30c7\u30fc\u30bf\u3001epochs(\u8a66\u884c\u56de\u6570)\u306f\u4efb\u610f(10)\n    model.fit([x_s, x_q], y,\n              batch_size=128,\n              epochs=50)\n    #\u5b66\u7fd2\u6e08\u307f\u30e2\u30c7\u30eb\u3092\u4fdd\u5b58\n    model.save('text_generation_for_aiandstory')<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\"> \u4fdd\u5b58\u3057\u305f\u30e2\u30c7\u30eb\u306b\u3088\u308b\u6587\u7ae0\u751f\u6210 <\/h4>\n\n\n\n<pre class=\"wp-block-code\"><code># -*- coding: utf-8 -*-\n\nimport numpy as np\nimport sys\n \nfrom tensorflow.keras.models import load_model\nfrom text_generation_for_aiandstory import maxlen, text, char_indices, indices_char\n\n#\u4fdd\u5b58\u3057\u305f\u30e2\u30c7\u30eb\u3092\u30ed\u30fc\u30c9\u3059\u308b\nmodel=load_model('text_generation_for_aiandstory')\n \ngenerated = ''\n\nsys.stdout.write(generated)\n\n#sample.txt\u306e\u4f55\u6587\u5b57\u76ee\u304b\u3089\u6700\u521d\u306b\u4e0e\u3048\u308b\u6587\u3092\u53d6\u308a\u51fa\u3059\u304b\u3092\u6c7a\u3081\u308b\ns_ind = 102\nsentence = text[s_ind: s_ind + maxlen]\n#\u5225\u9014\u4e0e\u3048\u308b\u5834\u5408\u306fmaxlen\u6587\u5b57\u3067\u3001\u3059\u3079\u3066\u306e\u6587\u5b57\u304c\u8a9e\u5f59\u306b\u5b58\u5728\u3057\u3066\u3044\u306a\u3051\u308c\u3070\u306a\u3089\u306a\u3044\u3002\n#sentence = \"\"\nprint(sentence)\n\n#500\u6587\u5b57\u751f\u6210\u3059\u308b\nfor i in range(500):\n    x4preds = []\n    x4pred = [char_indices[char] for char in sentence]\n    x4preds.append(x4pred)\n    x4preds = np.array(x4preds)\n    \n    #\u7b54\u3048\u304c\u8907\u6570\u6587\u5b57\u306e\u5834\u5408\u306b\u3082\u5bfe\u5fdc\u3055\u305b\u308b\n    preds = model.predict([x4preds, x4preds], verbose=0)[0]#x4preds\u306f\u554f\u984c\u6587=\u8cea\u554f\u6587\n    ans_indexies = [np.argmax(pred) for pred in preds]\n    next_chars = [indices_char[ans_index] for ans_index in ans_indexies]\n    next_chars = ''.join(next_chars)\n    sentence = sentence[len(next_chars):] + next_chars\n\n    sys.stdout.write(next_chars)\n\n    sys.stdout.flush()<\/code><\/pre>\n\n\n\n<h4 class=\"wp-block-heading\"><strong>\u53c2\u8003\u6587\u732e<\/strong><\/h4>\n\n\n\n<h5 class=\"wp-block-heading\"> \u5168\u822c <\/h5>\n\n\n\n<p>[1] &nbsp; <a href=\"https:\/\/github.com\/keras-team\/keras\/tree\/master\/examples\">https:\/\/github.com\/keras-team\/keras\/tree\/master\/examples<\/a><\/p>\n\n\n\n<h5 class=\"wp-block-heading\"> Attention <\/h5>\n\n\n\n<p>[2] &nbsp; wanasit : Attention-based Sequence-to-Sequence in Keras&nbsp; <a href=\"https:\/\/wanasit.github.io\/attention-based-sequence-to-sequence-in-keras.html\">https:\/\/wanasit.github.io\/attention-based-sequence-to-sequence-in-keras.html<\/a> (2017)<\/p>\n\n\n\n<p>[3] &nbsp; Minh-Thang Luong, Hieu Pham, Christopher D. Manning : \u201dEffective Approaches to Attention-based Neural Machine Translation\u201d\u3000arXiv preprint arXiv:1508.04025 (2015)<\/p>\n\n\n\n<h5 class=\"wp-block-heading\"> Memory Networks <\/h5>\n\n\n\n<p>[4]&nbsp; &nbsp; Sainbayar Sukhbaatar, Arthur Szlam, Jason Weston, Rob Fergus: &nbsp;&nbsp;&#8220;End-To-End Memory Networks&#8221;   arXiv preprint arXiv:1503.08895 (2015) <\/p>\n\n\n\n<p>[5] &nbsp; keras-team <br><a href=\"https:\/\/github.com\/keras-team\/keras\/blob\/master\/examples\/babi_memnn.py\">https:\/\/github.com\/keras-team\/keras\/blob\/master\/examples\/babi_memnn.py<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4efb\u610f\u306e\u9577\u3055\u30c6\u30ad\u30b9\u30c8\u3092End-To-End Memory Networks\u3068RNN Encoder-Decoder with Attention\u3092\u5229\u7528\u3057\u305f\u30e2\u30c7\u30eb\u306b\u5b66\u7fd2\u3055\u305b\u308b\u305f\u3081\u306eKeras\u306e\u30b5\u30f3\u30d7\u30eb\u30b3\u30fc\u30c9\u3002\u300c\u30e2\u30c7\u30eb\u306e\u5b9a\u7fa9\u3068 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[19,5],"tags":[11,13,20],"_links":{"self":[{"href":"https:\/\/aiandstory.net\/index.php?rest_route=\/wp\/v2\/posts\/427"}],"collection":[{"href":"https:\/\/aiandstory.net\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/aiandstory.net\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/aiandstory.net\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/aiandstory.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=427"}],"version-history":[{"count":5,"href":"https:\/\/aiandstory.net\/index.php?rest_route=\/wp\/v2\/posts\/427\/revisions"}],"predecessor-version":[{"id":432,"href":"https:\/\/aiandstory.net\/index.php?rest_route=\/wp\/v2\/posts\/427\/revisions\/432"}],"wp:attachment":[{"href":"https:\/\/aiandstory.net\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=427"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/aiandstory.net\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=427"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/aiandstory.net\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=427"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}