Keras教程
作者: 时海
IMDB影评情感分析-CNN

一维卷积神经网络--IMDB影评情感分析

# -*- coding: UTF-8 -*-
from keras.datasets import imdb
from keras.layers import Dense, Conv1D, MaxPool1D, Embedding, Flatten
from keras.models import Sequential
from keras.preprocessing import sequence

# 加载数据
local_path = 'D:/data/keras/imdb.npz'
(x_train, y_train), (x_test, y_test) = imdb.load_data(path=local_path);
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
# (25000,) (25000,) (25000,) (25000,)
print(x_train[:3])
print(y_train[:3])
print(x_test[:3])
print(y_test[:3])

vocab_count = 0
for seq in x_train:
    max_index = max(seq)
    if max_index > vocab_count:
        vocab_count = max_index

for seq in x_test:
    max_index = max(seq)
    if max_index > vocab_count:
        vocab_count = max_index

vocab_count += 1
print("total vocab count:", vocab_count)
# 88587

x_train = sequence.pad_sequences(x_train, maxlen=100)
x_test = sequence.pad_sequences(x_test, maxlen=100)

model = Sequential()
model.add(Embedding(
    output_dim=300,
    input_dim=vocab_count,
    input_length=100
))

model.add(Conv1D(filters=64,
                 kernel_size=3,
                 padding='same',
                 activation='relu'))

model.add(MaxPool1D(pool_size=2))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
print(model.summary())

# Model: "sequential_1"
# _________________________________________________________________
# Layer (type)                 Output Shape              Param #
# =================================================================
# embedding_1 (Embedding)      (None, 100, 300)          26575500
# _________________________________________________________________
# conv1d_1 (Conv1D)            (None, 100, 64)           57664
# _________________________________________________________________
# max_pooling1d_1 (MaxPooling1 (None, 50, 64)            0
# _________________________________________________________________
# flatten_1 (Flatten)          (None, 3200)              0
# _________________________________________________________________
# dense_1 (Dense)              (None, 1)                 3201
# =================================================================
# Total params: 26,636,365
# Trainable params: 26,636,365
# Non-trainable params: 0
# _________________________________________________________________
# None

history = model.fit(x_train, y_train,
                    batch_size=1000,
                    epochs=10,
                    verbose=2,
                    validation_split=0.2)

scores = model.evaluate(x_test, y_test)
print(scores)
# [0.48264816513061526, 0.8366000056266785]


一个创业中的苦逼程序员
  • 回复
隐藏