In [ ]:
!curl -s https://raw.githubusercontent.com/teddylee777/machine-learning/master/99-Misc/01-Colab/mecab-colab.sh | bash
In [45]:
# AI lib - sklearn
# xxxxClassfier(분류) - 평가지표(정확도, 정밀도, 재현율, 조화평균)
# xxxxRegression (예측/회귀)
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
from sklearn.datasets import load_iris, load_breast_cancer, fetch_openml, load_digits
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, mean_squared_error, mean_absolute_error , silhouette_samples , silhouette_score
from sklearn.neighbors import KNeighborsClassifier , KNeighborsRegressor
from sklearn.linear_model import LogisticRegression , LinearRegression, Ridge, Lasso
#비지도학습
from sklearn.cluster import KMeans , DBSCAN
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler, Binarizer, PolynomialFeatures
print('sklearn version', sklearn.__version__)
#tenserflow lib
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Activation, Flatten, Conv2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Embedding, SimpleRNN
from tensorflow.keras.datasets import boston_housing, mnist, fashion_mnist, cifar10 , imdb
from tensorflow.keras import optimizers
from tensorflow.keras.utils import to_categorical
#이미지로드
from tensorflow.keras.preprocessing import image
#코랩에 파일,이미지 업로드
from google.colab import files
#학습 조기 종료
from tensorflow.keras.callbacks import EarlyStopping , ModelCheckpoint, Callback
# 자연어 처리
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
print('tensorflow version', tf.__version__)
#import konlpy
#print(konlpy.__version__)
sklearn version 1.2.2 tensorflow version 2.14.0
학습목표
- 자연어 처리
- NLP (nlpt)
- 단어 -> 숫자(인코딩)
- 문장안에 단어가 들어있고 -> 토큰화 과정이 필요하다
- 문장의 길이를 맞추는 패딩
- Embeding , RNN , LSTM , GRU, BERT
In [66]:
print('문자 기반 인코딩이란 ? ')
print('LISTEN')
print("SILENT")
print('단어 기반 인코딩 - ')
sentences = [
'I love my student',
'I love my pupil',
'JinHo love your Teacher?',
'Do your think your Teacher is amazing?'
]
tokenizer = Tokenizer(num_words = 100) #num_words => 단어의 갯수 제한
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
print(word_index)
print()
print()
print()
train = tokenizer.texts_to_sequences(sentences)
print(train)
문자 기반 인코딩이란 ? LISTEN SILENT 단어 기반 인코딩 - {'love': 1, 'your': 2, 'i': 3, 'my': 4, 'teacher': 5, 'student': 6, 'pupil': 7, 'jinho': 8, 'do': 9, 'think': 10, 'is': 11, 'amazing': 12} [[3, 1, 4, 6], [3, 1, 4, 7], [8, 1, 2, 5], [9, 2, 10, 2, 5, 11, 12]]
In [67]:
test_sentences = [
'I really love my student',
'Jinho loves my Teacher'
]
test = tokenizer.texts_to_sequences(test_sentences)
print(test)
[[3, 1, 4, 6], [8, 4, 5]]
In [68]:
print('토큰화되지 않은 단어처리가 필요하다')
print('oov_token 파라미터를 이용해서 특수값으로 처리한다')
sentences = [
'I love my student',
'I love my pupil',
'JinHo love your Teacher?',
'Do your think your Teacher is amazing?'
]
tokenizer = Tokenizer(num_words = 100, oov_token='<00V>') #num_words => 단어의 갯수 제한
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
print(word_index)
print()
print()
print()
train = tokenizer.texts_to_sequences(sentences)
print(train)
print()
train_padding = pad_sequences(train, padding = 'pre', maxlen= 6, truncating='post')
print(train_padding)
토큰화되지 않은 단어처리가 필요하다 oov_token 파라미터를 이용해서 특수값으로 처리한다 {'<00V>': 1, 'love': 2, 'your': 3, 'i': 4, 'my': 5, 'teacher': 6, 'student': 7, 'pupil': 8, 'jinho': 9, 'do': 10, 'think': 11, 'is': 12, 'amazing': 13} [[4, 2, 5, 7], [4, 2, 5, 8], [9, 2, 3, 6], [10, 3, 11, 3, 6, 12, 13]] [[ 0 0 4 2 5 7] [ 0 0 4 2 5 8] [ 0 0 9 2 3 6] [10 3 11 3 6 12]]
In [69]:
test_sentences = [
'I really love my student',
'Jinho loves my Teacher'
]
test = tokenizer.texts_to_sequences(test_sentences)
print(test)
[[4, 1, 2, 5, 7], [9, 1, 5, 6]]
In [70]:
print('imdb 데이터 세트를 이용한 실습')
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = 1000)
(X_train.shape, y_train.shape), (X_test.shape, y_test.shape)
imdb 데이터 세트를 이용한 실습
Out[70]:
(((25000,), (25000,)), ((25000,), (25000,)))
In [71]:
len(X_train[0])
Out[71]:
218
In [72]:
y_train[0]
Out[72]:
1
In [73]:
pd.Series(y_train).value_counts()
Out[73]:
1 12500 0 12500 dtype: int64
In [74]:
train_padding = pad_sequences(X_train, padding = 'pre', maxlen= 500)
#print(train_padding)
test_padding = pad_sequences(X_test, padding = 'pre', maxlen= 500)
#print(test_padding)
len(train_padding[0])
Out[74]:
500
In [75]:
nlp_model = Sequential()
#입력층
#input_dim 학습데이터에서 사용하게 되는 인자의 개수
#output_dim 임베딩 벡터의 크기
nlp_model.add(Embedding(input_dim = 1000 , output_dim = 32, input_length = 500))
nlp_model.add(Flatten()) #1차원으로
#출력층
nlp_model.add(Dense(units = 1 ))
nlp_model.add(Activation('sigmoid'))
nlp_model.compile(loss = 'binary_crossentropy',
metrics = ['accuracy'],
optimizer = 'adam')
In [76]:
nlp_model.summary()
Model: "sequential_3" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= embedding_3 (Embedding) (None, 500, 32) 32000 flatten_3 (Flatten) (None, 16000) 0 dense_3 (Dense) (None, 1) 16001 activation_3 (Activation) (None, 1) 0 ================================================================= Total params: 48001 (187.50 KB) Trainable params: 48001 (187.50 KB) Non-trainable params: 0 (0.00 Byte) _________________________________________________________________
In [77]:
nlp_model_history = nlp_model.fit(train_padding, y_train,
batch_size = 32,
epochs = 30 ,
validation_split = 0.2,
verbose = 1)
Epoch 1/30 625/625 [==============================] - 8s 10ms/step - loss: 0.5017 - accuracy: 0.7426 - val_loss: 0.3556 - val_accuracy: 0.8466 Epoch 2/30 625/625 [==============================] - 5s 7ms/step - loss: 0.3024 - accuracy: 0.8740 - val_loss: 0.3324 - val_accuracy: 0.8604 Epoch 3/30 625/625 [==============================] - 5s 8ms/step - loss: 0.2434 - accuracy: 0.9049 - val_loss: 0.3483 - val_accuracy: 0.8520 Epoch 4/30 625/625 [==============================] - 4s 7ms/step - loss: 0.1887 - accuracy: 0.9306 - val_loss: 0.3576 - val_accuracy: 0.8530 Epoch 5/30 625/625 [==============================] - 4s 7ms/step - loss: 0.1409 - accuracy: 0.9584 - val_loss: 0.3816 - val_accuracy: 0.8476 Epoch 6/30 625/625 [==============================] - 5s 8ms/step - loss: 0.1028 - accuracy: 0.9746 - val_loss: 0.4075 - val_accuracy: 0.8446 Epoch 7/30 625/625 [==============================] - 4s 7ms/step - loss: 0.0734 - accuracy: 0.9865 - val_loss: 0.4335 - val_accuracy: 0.8424 Epoch 8/30 625/625 [==============================] - 10s 16ms/step - loss: 0.0513 - accuracy: 0.9929 - val_loss: 0.4743 - val_accuracy: 0.8400 Epoch 9/30 625/625 [==============================] - 7s 11ms/step - loss: 0.0353 - accuracy: 0.9969 - val_loss: 0.4980 - val_accuracy: 0.8382 Epoch 10/30 625/625 [==============================] - 6s 10ms/step - loss: 0.0241 - accuracy: 0.9988 - val_loss: 0.5335 - val_accuracy: 0.8350 Epoch 11/30 625/625 [==============================] - 8s 13ms/step - loss: 0.0164 - accuracy: 0.9995 - val_loss: 0.5705 - val_accuracy: 0.8356 Epoch 12/30 625/625 [==============================] - 9s 14ms/step - loss: 0.0113 - accuracy: 0.9998 - val_loss: 0.6022 - val_accuracy: 0.8364 Epoch 13/30 625/625 [==============================] - 4s 7ms/step - loss: 0.0078 - accuracy: 0.9999 - val_loss: 0.6383 - val_accuracy: 0.8356 Epoch 14/30 625/625 [==============================] - 7s 12ms/step - loss: 0.0053 - accuracy: 0.9999 - val_loss: 0.6736 - val_accuracy: 0.8358 Epoch 15/30 625/625 [==============================] - 8s 12ms/step - loss: 0.0037 - accuracy: 0.9999 - val_loss: 0.7149 - val_accuracy: 0.8368 Epoch 16/30 625/625 [==============================] - 6s 9ms/step - loss: 0.0026 - accuracy: 1.0000 - val_loss: 0.7507 - val_accuracy: 0.8320 Epoch 17/30 625/625 [==============================] - 7s 12ms/step - loss: 0.0019 - accuracy: 1.0000 - val_loss: 0.7799 - val_accuracy: 0.8352 Epoch 18/30 625/625 [==============================] - 7s 12ms/step - loss: 0.0013 - accuracy: 1.0000 - val_loss: 0.8133 - val_accuracy: 0.8352 Epoch 19/30 625/625 [==============================] - 4s 6ms/step - loss: 8.7890e-04 - accuracy: 1.0000 - val_loss: 0.8460 - val_accuracy: 0.8348 Epoch 20/30 625/625 [==============================] - 5s 8ms/step - loss: 6.5062e-04 - accuracy: 1.0000 - val_loss: 0.8825 - val_accuracy: 0.8334 Epoch 21/30 625/625 [==============================] - 4s 7ms/step - loss: 4.5460e-04 - accuracy: 1.0000 - val_loss: 0.9219 - val_accuracy: 0.8320 Epoch 22/30 625/625 [==============================] - 4s 7ms/step - loss: 3.2471e-04 - accuracy: 1.0000 - val_loss: 0.9502 - val_accuracy: 0.8350 Epoch 23/30 625/625 [==============================] - 5s 8ms/step - loss: 2.2310e-04 - accuracy: 1.0000 - val_loss: 0.9863 - val_accuracy: 0.8328 Epoch 24/30 625/625 [==============================] - 4s 7ms/step - loss: 1.6240e-04 - accuracy: 1.0000 - val_loss: 1.0201 - val_accuracy: 0.8340 Epoch 25/30 625/625 [==============================] - 5s 8ms/step - loss: 1.1446e-04 - accuracy: 1.0000 - val_loss: 1.0534 - val_accuracy: 0.8328 Epoch 26/30 625/625 [==============================] - 6s 9ms/step - loss: 8.3634e-05 - accuracy: 1.0000 - val_loss: 1.0919 - val_accuracy: 0.8306 Epoch 27/30 625/625 [==============================] - 4s 7ms/step - loss: 5.9314e-05 - accuracy: 1.0000 - val_loss: 1.1219 - val_accuracy: 0.8342 Epoch 28/30 625/625 [==============================] - 5s 9ms/step - loss: 4.2478e-05 - accuracy: 1.0000 - val_loss: 1.1600 - val_accuracy: 0.8330 Epoch 29/30 625/625 [==============================] - 4s 7ms/step - loss: 3.0431e-05 - accuracy: 1.0000 - val_loss: 1.1922 - val_accuracy: 0.8330 Epoch 30/30 625/625 [==============================] - 4s 7ms/step - loss: 2.1722e-05 - accuracy: 1.0000 - val_loss: 1.2250 - val_accuracy: 0.8324
In [78]:
nlp_model.evaluate(test_padding, y_test)
782/782 [==============================] - 3s 3ms/step - loss: 1.2400 - accuracy: 0.8277
Out[78]:
[1.2399687767028809, 0.8277199864387512]
In [79]:
y_predict = nlp_model.predict(test_padding)
print('model guess - ', y_predict[2])
print('test answer - ', y_test[2])
782/782 [==============================] - 3s 4ms/step model guess - [2.7484154e-07] test answer - 1
In [80]:
plt.figure(figsize = (15,5))
plt.plot(nlp_model_history.history['accuracy'], label = 'train_accuracy')
plt.plot(nlp_model_history.history['val_accuracy'], label = 'val_accuracy')
plt.legend()
plt.show()
plt.close()
In [81]:
plt.figure(figsize = (15,5))
plt.plot(nlp_model_history.history['loss'], label = 'train_loss')
plt.plot(nlp_model_history.history['val_loss'], label = 'val_loss')
plt.legend()
plt.show()
plt.close()
In [82]:
print('accuracy - ', accuracy_score(y_test, y_predict))
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-82-00561407af8d> in <cell line: 1>() ----> 1 print('accuracy - ', accuracy_score(y_test, y_predict)) /usr/local/lib/python3.10/dist-packages/sklearn/utils/_param_validation.py in wrapper(*args, **kwargs) 190 191 try: --> 192 return func(*args, **kwargs) 193 except InvalidParameterError as e: 194 # When the function is just a wrapper around an estimator, we allow /usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py in accuracy_score(y_true, y_pred, normalize, sample_weight) 219 220 # Compute accuracy for each possible representation --> 221 y_type, y_true, y_pred = _check_targets(y_true, y_pred) 222 check_consistent_length(y_true, y_pred, sample_weight) 223 if y_type.startswith("multilabel"): /usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py in _check_targets(y_true, y_pred) 93 94 if len(y_type) > 1: ---> 95 raise ValueError( 96 "Classification metrics can't handle a mix of {0} and {1} targets".format( 97 type_true, type_pred ValueError: Classification metrics can't handle a mix of binary and continuous targets
In [83]:
!pip install -q tensorflow-datasets
In [30]:
import tensorflow_datasets as tfds
imdb, info = tfds.load('imdb_reviews', with_info = True, as_supervised = True)
In [31]:
train_data, test_data = imdb['train'], imdb['test']
In [32]:
print(type(train_data))
<class 'tensorflow.python.data.ops.prefetch_op._PrefetchDataset'>
In [33]:
train_sentences = []
train_labels = []
test_sentences = []
test_labels = []
for s , l in train_data :
train_sentences.append(str(s.numpy()))
train_labels.append(l.numpy())
for s , l in test_data :
test_sentences.append(str(s.numpy()))
test_labels.append(l.numpy())
In [34]:
print(test_sentences[0])
print(test_labels[0])
b"There are films that make careers. For George Romero, it was NIGHT OF THE LIVING DEAD; for Kevin Smith, CLERKS; for Robert Rodriguez, EL MARIACHI. Add to that list Onur Tukel's absolutely amazing DING-A-LING-LESS. Flawless film-making, and as assured and as professional as any of the aforementioned movies. I haven't laughed this hard since I saw THE FULL MONTY. (And, even then, I don't think I laughed quite this hard... So to speak.) Tukel's talent is considerable: DING-A-LING-LESS is so chock full of double entendres that one would have to sit down with a copy of this script and do a line-by-line examination of it to fully appreciate the, uh, breadth and width of it. Every shot is beautifully composed (a clear sign of a sure-handed director), and the performances all around are solid (there's none of the over-the-top scenery chewing one might've expected from a film like this). DING-A-LING-LESS is a film whose time has come." 1
In [35]:
print('type - ', type(train_labels))
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)
type - <class 'list'>
In [36]:
print('type - ', type(train_labels))
type - <class 'numpy.ndarray'>
In [1]:
vocab_size = 1000
oov_token = '<00V>'
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(train_sentences)
word_index = tokenizer.word_index
#print(word_index)
print()
print()
train = tokenizer.texts_to_sequences(train_sentences)
train_padding = pad_sequences(train, padding = 'pre', maxlen= 120 )
test = tokenizer.texts_to_sequences(test_sentences)
test_padding = pad_sequences(test, padding = 'pre', maxlen= 120 )
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[1], line 5 1 vocab_size = 1000 2 oov_token = '<00V>' ----> 5 tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_token) 6 tokenizer.fit_on_texts(train_sentences) 7 word_index = tokenizer.word_index NameError: name 'Tokenizer' is not defined
In [24]:
train_padding[0]
Out[24]:
array([ 0, 0, 59, 12, 14, 35, 439, 400, 18, 174, 29, 1, 9, 33, 1, 1, 42, 496, 1, 197, 25, 88, 156, 19, 12, 211, 340, 29, 70, 248, 213, 9, 486, 62, 70, 88, 116, 99, 24, 1, 12, 1, 657, 777, 12, 18, 7, 35, 406, 1, 178, 1, 426, 2, 92, 1, 140, 72, 149, 55, 2, 1, 1, 72, 229, 70, 1, 16, 1, 1, 1, 1, 1, 1, 3, 40, 1, 119, 1, 17, 1, 14, 163, 19, 4, 1, 927, 1, 9, 4, 18, 13, 14, 1, 5, 102, 148, 1, 11, 240, 692, 13, 44, 25, 101, 39, 12, 1, 1, 39, 1, 1, 52, 409, 11, 99, 1, 874, 145, 10], dtype=int32)
In [25]:
train_labels[0]
Out[25]:
0
In [38]:
nlp_model = Sequential()
#입력층
#input_dim 학습데이터에서 사용하게 되는 인자의 개수
#output_dim 임베딩 벡터의 크기
nlp_model.add(Embedding(input_dim = 1000 , output_dim = 64, input_length = 120))
nlp_model.add(Flatten())
#은닉층
nlp_model.add(Dense(units = 10 ))
nlp_model.add(Activation('relu'))
#출력층
nlp_model.add(Dense(units = 1 ))
nlp_model.add(Activation('sigmoid'))
nlp_model.compile(loss = 'binary_crossentropy',metrics = ['accuracy'],optimizer = 'adam')
In [39]:
nlp_model_history = nlp_model.fit(train_padding, train_labels,
batch_size = 150,
epochs = 50 ,
validation_split = 0.2,
verbose = 1)
Epoch 1/50 134/134 [==============================] - 3s 19ms/step - loss: 0.5979 - accuracy: 0.6694 - val_loss: 0.4161 - val_accuracy: 0.8132 Epoch 2/50 134/134 [==============================] - 2s 16ms/step - loss: 0.3543 - accuracy: 0.8449 - val_loss: 0.3724 - val_accuracy: 0.8348 Epoch 3/50 134/134 [==============================] - 2s 12ms/step - loss: 0.2732 - accuracy: 0.8895 - val_loss: 0.3895 - val_accuracy: 0.8248 Epoch 4/50 134/134 [==============================] - 2s 12ms/step - loss: 0.1949 - accuracy: 0.9339 - val_loss: 0.4507 - val_accuracy: 0.8030 Epoch 5/50 134/134 [==============================] - 2s 15ms/step - loss: 0.1186 - accuracy: 0.9728 - val_loss: 0.4688 - val_accuracy: 0.8124 Epoch 6/50 134/134 [==============================] - 2s 17ms/step - loss: 0.0632 - accuracy: 0.9924 - val_loss: 0.5244 - val_accuracy: 0.8076 Epoch 7/50 134/134 [==============================] - 2s 16ms/step - loss: 0.0335 - accuracy: 0.9979 - val_loss: 0.5773 - val_accuracy: 0.8068 Epoch 8/50 134/134 [==============================] - 3s 21ms/step - loss: 0.0199 - accuracy: 0.9988 - val_loss: 0.6222 - val_accuracy: 0.8076 Epoch 9/50 134/134 [==============================] - 2s 14ms/step - loss: 0.0132 - accuracy: 0.9991 - val_loss: 0.6595 - val_accuracy: 0.8060 Epoch 10/50 134/134 [==============================] - 2s 14ms/step - loss: 0.0093 - accuracy: 0.9992 - val_loss: 0.7001 - val_accuracy: 0.8046 Epoch 11/50 134/134 [==============================] - 2s 14ms/step - loss: 0.0065 - accuracy: 0.9995 - val_loss: 0.7286 - val_accuracy: 0.8040 Epoch 12/50 134/134 [==============================] - 2s 15ms/step - loss: 0.0046 - accuracy: 0.9997 - val_loss: 0.7621 - val_accuracy: 0.8028 Epoch 13/50 134/134 [==============================] - 2s 14ms/step - loss: 0.0032 - accuracy: 0.9999 - val_loss: 0.7904 - val_accuracy: 0.8048 Epoch 14/50 134/134 [==============================] - 3s 20ms/step - loss: 0.0024 - accuracy: 0.9999 - val_loss: 0.8116 - val_accuracy: 0.8052 Epoch 15/50 134/134 [==============================] - 2s 13ms/step - loss: 0.0019 - accuracy: 0.9999 - val_loss: 0.8360 - val_accuracy: 0.8046 Epoch 16/50 134/134 [==============================] - 2s 12ms/step - loss: 0.0015 - accuracy: 1.0000 - val_loss: 0.8547 - val_accuracy: 0.8038 Epoch 17/50 134/134 [==============================] - 2s 13ms/step - loss: 0.0012 - accuracy: 1.0000 - val_loss: 0.8732 - val_accuracy: 0.8036 Epoch 18/50 134/134 [==============================] - 2s 17ms/step - loss: 9.9431e-04 - accuracy: 1.0000 - val_loss: 0.8920 - val_accuracy: 0.8042 Epoch 19/50 134/134 [==============================] - 2s 12ms/step - loss: 8.4676e-04 - accuracy: 1.0000 - val_loss: 0.9094 - val_accuracy: 0.8040 Epoch 20/50 134/134 [==============================] - 2s 13ms/step - loss: 7.2120e-04 - accuracy: 1.0000 - val_loss: 0.9248 - val_accuracy: 0.8044 Epoch 21/50 134/134 [==============================] - 3s 22ms/step - loss: 6.2505e-04 - accuracy: 1.0000 - val_loss: 0.9406 - val_accuracy: 0.8042 Epoch 22/50 134/134 [==============================] - 2s 14ms/step - loss: 5.4221e-04 - accuracy: 1.0000 - val_loss: 0.9534 - val_accuracy: 0.8030 Epoch 23/50 134/134 [==============================] - 2s 14ms/step - loss: 4.7428e-04 - accuracy: 1.0000 - val_loss: 0.9700 - val_accuracy: 0.8042 Epoch 24/50 134/134 [==============================] - 2s 16ms/step - loss: 4.1528e-04 - accuracy: 1.0000 - val_loss: 0.9827 - val_accuracy: 0.8036 Epoch 25/50 134/134 [==============================] - 2s 13ms/step - loss: 3.6806e-04 - accuracy: 1.0000 - val_loss: 0.9956 - val_accuracy: 0.8030 Epoch 26/50 134/134 [==============================] - 2s 13ms/step - loss: 3.2585e-04 - accuracy: 1.0000 - val_loss: 1.0105 - val_accuracy: 0.8028 Epoch 27/50 134/134 [==============================] - 3s 19ms/step - loss: 2.8941e-04 - accuracy: 1.0000 - val_loss: 1.0198 - val_accuracy: 0.8020 Epoch 28/50 134/134 [==============================] - 2s 14ms/step - loss: 2.5782e-04 - accuracy: 1.0000 - val_loss: 1.0346 - val_accuracy: 0.8024 Epoch 29/50 134/134 [==============================] - 2s 12ms/step - loss: 2.3064e-04 - accuracy: 1.0000 - val_loss: 1.0460 - val_accuracy: 0.8018 Epoch 30/50 134/134 [==============================] - 2s 14ms/step - loss: 2.0710e-04 - accuracy: 1.0000 - val_loss: 1.0590 - val_accuracy: 0.8026 Epoch 31/50 134/134 [==============================] - 2s 12ms/step - loss: 1.8614e-04 - accuracy: 1.0000 - val_loss: 1.0694 - val_accuracy: 0.8024 Epoch 32/50 134/134 [==============================] - 2s 14ms/step - loss: 1.6763e-04 - accuracy: 1.0000 - val_loss: 1.0809 - val_accuracy: 0.8022 Epoch 33/50 134/134 [==============================] - 2s 12ms/step - loss: 1.5116e-04 - accuracy: 1.0000 - val_loss: 1.0924 - val_accuracy: 0.8024 Epoch 34/50 134/134 [==============================] - 3s 20ms/step - loss: 1.3688e-04 - accuracy: 1.0000 - val_loss: 1.1028 - val_accuracy: 0.8020 Epoch 35/50 134/134 [==============================] - 2s 16ms/step - loss: 1.2378e-04 - accuracy: 1.0000 - val_loss: 1.1129 - val_accuracy: 0.8020 Epoch 36/50 134/134 [==============================] - 2s 12ms/step - loss: 1.1230e-04 - accuracy: 1.0000 - val_loss: 1.1238 - val_accuracy: 0.8016 Epoch 37/50 134/134 [==============================] - 2s 15ms/step - loss: 1.0200e-04 - accuracy: 1.0000 - val_loss: 1.1329 - val_accuracy: 0.8016 Epoch 38/50 134/134 [==============================] - 2s 14ms/step - loss: 9.2752e-05 - accuracy: 1.0000 - val_loss: 1.1447 - val_accuracy: 0.8020 Epoch 39/50 134/134 [==============================] - 2s 12ms/step - loss: 8.4532e-05 - accuracy: 1.0000 - val_loss: 1.1547 - val_accuracy: 0.8010 Epoch 40/50 134/134 [==============================] - 2s 18ms/step - loss: 7.7050e-05 - accuracy: 1.0000 - val_loss: 1.1678 - val_accuracy: 0.8030 Epoch 41/50 134/134 [==============================] - 3s 20ms/step - loss: 7.0398e-05 - accuracy: 1.0000 - val_loss: 1.1754 - val_accuracy: 0.8012 Epoch 42/50 134/134 [==============================] - 2s 15ms/step - loss: 6.4110e-05 - accuracy: 1.0000 - val_loss: 1.1858 - val_accuracy: 0.8018 Epoch 43/50 134/134 [==============================] - 2s 12ms/step - loss: 5.8613e-05 - accuracy: 1.0000 - val_loss: 1.1956 - val_accuracy: 0.8016 Epoch 44/50 134/134 [==============================] - 2s 16ms/step - loss: 5.3589e-05 - accuracy: 1.0000 - val_loss: 1.2057 - val_accuracy: 0.8018 Epoch 45/50 134/134 [==============================] - 2s 13ms/step - loss: 4.9153e-05 - accuracy: 1.0000 - val_loss: 1.2144 - val_accuracy: 0.8014 Epoch 46/50 134/134 [==============================] - 2s 16ms/step - loss: 4.4964e-05 - accuracy: 1.0000 - val_loss: 1.2251 - val_accuracy: 0.8014 Epoch 47/50 134/134 [==============================] - 3s 20ms/step - loss: 4.1301e-05 - accuracy: 1.0000 - val_loss: 1.2339 - val_accuracy: 0.8012 Epoch 48/50 134/134 [==============================] - 2s 15ms/step - loss: 3.7808e-05 - accuracy: 1.0000 - val_loss: 1.2439 - val_accuracy: 0.8020 Epoch 49/50 134/134 [==============================] - 2s 14ms/step - loss: 3.4740e-05 - accuracy: 1.0000 - val_loss: 1.2530 - val_accuracy: 0.8012 Epoch 50/50 134/134 [==============================] - 2s 16ms/step - loss: 3.1882e-05 - accuracy: 1.0000 - val_loss: 1.2626 - val_accuracy: 0.8010
In [40]:
plt.figure(figsize = (15,5))
plt.plot(nlp_model_history.history['accuracy'], label = 'train_accuracy')
plt.plot(nlp_model_history.history['val_accuracy'], label = 'val_accuracy')
plt.legend()
plt.show()
plt.close()
In [43]:
print('RNN - ')
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = 1000)
(X_train.shape, y_train.shape), (X_test.shape, y_test.shape)
RNN -
Out[43]:
(((25000,), (25000,)), ((25000,), (25000,)))
In [44]:
train_padding = pad_sequences(X_train, padding = 'pre', maxlen= 500)
#print(train_padding)
test_padding = pad_sequences(X_test, padding = 'pre', maxlen= 500)
#print(test_padding)
len(train_padding[0])
Out[44]:
500
In [48]:
nlp_model = Sequential()
#입력층
#input_dim 학습데이터에서 사용하게 되는 인자의 개수
#output_dim 임베딩 벡터의 크기
nlp_model.add(Embedding(input_dim = 1000 , output_dim = 32, input_length = 500))
nlp_model.add(SimpleRNN( 32 , return_sequences=True, dropout = 0.15 ))
nlp_model.add(SimpleRNN( 32 , return_sequences=False))
# #출력층
nlp_model.add(Dense(units = 1 ))
nlp_model.add(Activation('sigmoid'))
nlp_model.compile(loss = 'binary_crossentropy',
metrics = ['accuracy'],
optimizer = 'adam')
In [49]:
nlp_model.summary()
Model: "sequential_4" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= embedding_4 (Embedding) (None, 500, 32) 32000 simple_rnn (SimpleRNN) (None, 500, 32) 2080 simple_rnn_1 (SimpleRNN) (None, 32) 2080 dense_6 (Dense) (None, 1) 33 activation_6 (Activation) (None, 1) 0 ================================================================= Total params: 36193 (141.38 KB) Trainable params: 36193 (141.38 KB) Non-trainable params: 0 (0.00 Byte) _________________________________________________________________
In [50]:
nlp_model_history = nlp_model.fit(train_padding, y_train,
batch_size = 32,
epochs = 30 ,
validation_split = 0.2,
verbose = 1)
Epoch 1/30 625/625 [==============================] - 165s 255ms/step - loss: 0.5794 - accuracy: 0.6698 - val_loss: 0.5130 - val_accuracy: 0.7422 Epoch 2/30 625/625 [==============================] - 160s 256ms/step - loss: 0.4473 - accuracy: 0.7980 - val_loss: 0.4577 - val_accuracy: 0.8014 Epoch 3/30 625/625 [==============================] - 158s 254ms/step - loss: 0.4494 - accuracy: 0.7927 - val_loss: 0.4265 - val_accuracy: 0.8196 Epoch 4/30 625/625 [==============================] - 158s 253ms/step - loss: 0.4822 - accuracy: 0.7717 - val_loss: 0.9876 - val_accuracy: 0.5094 Epoch 5/30 625/625 [==============================] - 154s 247ms/step - loss: 0.6099 - accuracy: 0.6580 - val_loss: 0.5660 - val_accuracy: 0.7324 Epoch 6/30 625/625 [==============================] - 159s 254ms/step - loss: 0.5649 - accuracy: 0.7052 - val_loss: 0.6326 - val_accuracy: 0.6306 Epoch 7/30 625/625 [==============================] - 154s 247ms/step - loss: 0.5799 - accuracy: 0.7013 - val_loss: 0.6360 - val_accuracy: 0.6232 Epoch 8/30 625/625 [==============================] - 155s 249ms/step - loss: 0.5316 - accuracy: 0.7344 - val_loss: 0.6430 - val_accuracy: 0.6166 Epoch 9/30 625/625 [==============================] - 157s 251ms/step - loss: 0.5792 - accuracy: 0.6870 - val_loss: 0.6088 - val_accuracy: 0.6694 Epoch 10/30 625/625 [==============================] - 157s 251ms/step - loss: 0.5498 - accuracy: 0.7188 - val_loss: 0.5711 - val_accuracy: 0.7010 Epoch 11/30 625/625 [==============================] - 168s 269ms/step - loss: 0.5423 - accuracy: 0.7186 - val_loss: 0.6236 - val_accuracy: 0.6490 Epoch 12/30 625/625 [==============================] - 201s 321ms/step - loss: 0.5944 - accuracy: 0.6786 - val_loss: 0.6879 - val_accuracy: 0.6300 Epoch 13/30 625/625 [==============================] - 183s 293ms/step - loss: 0.5591 - accuracy: 0.7110 - val_loss: 0.6016 - val_accuracy: 0.6892 Epoch 14/30 625/625 [==============================] - 177s 284ms/step - loss: 0.5608 - accuracy: 0.7071 - val_loss: 0.6291 - val_accuracy: 0.6504 Epoch 15/30 625/625 [==============================] - 191s 305ms/step - loss: 0.5280 - accuracy: 0.7323 - val_loss: 0.5870 - val_accuracy: 0.7038 Epoch 16/30 625/625 [==============================] - 159s 254ms/step - loss: 0.5117 - accuracy: 0.7503 - val_loss: 0.5587 - val_accuracy: 0.7198 Epoch 17/30 625/625 [==============================] - 163s 261ms/step - loss: 0.5294 - accuracy: 0.7390 - val_loss: 0.6012 - val_accuracy: 0.6918 Epoch 18/30 625/625 [==============================] - 161s 257ms/step - loss: 0.5189 - accuracy: 0.7389 - val_loss: 0.5669 - val_accuracy: 0.7208 Epoch 19/30 625/625 [==============================] - 162s 260ms/step - loss: 0.5022 - accuracy: 0.7524 - val_loss: 0.6209 - val_accuracy: 0.6838 Epoch 20/30 625/625 [==============================] - 161s 257ms/step - loss: 0.4642 - accuracy: 0.7790 - val_loss: 0.5582 - val_accuracy: 0.7372 Epoch 21/30 625/625 [==============================] - 162s 259ms/step - loss: 0.4445 - accuracy: 0.7959 - val_loss: 0.6485 - val_accuracy: 0.6854 Epoch 22/30 625/625 [==============================] - 162s 259ms/step - loss: 0.4503 - accuracy: 0.7913 - val_loss: 0.6041 - val_accuracy: 0.6810 Epoch 23/30 625/625 [==============================] - 156s 250ms/step - loss: 0.5253 - accuracy: 0.7340 - val_loss: 0.6452 - val_accuracy: 0.6426 Epoch 24/30 625/625 [==============================] - 156s 249ms/step - loss: 0.4635 - accuracy: 0.7809 - val_loss: 0.6296 - val_accuracy: 0.6770 Epoch 25/30 625/625 [==============================] - 157s 252ms/step - loss: 0.4207 - accuracy: 0.8083 - val_loss: 0.5302 - val_accuracy: 0.7640 Epoch 26/30 625/625 [==============================] - 157s 251ms/step - loss: 0.3882 - accuracy: 0.8282 - val_loss: 0.6233 - val_accuracy: 0.7086 Epoch 27/30 625/625 [==============================] - 155s 248ms/step - loss: 0.3789 - accuracy: 0.8378 - val_loss: 0.5593 - val_accuracy: 0.7494 Epoch 28/30 625/625 [==============================] - 155s 248ms/step - loss: 0.3471 - accuracy: 0.8543 - val_loss: 0.5561 - val_accuracy: 0.7568 Epoch 29/30 625/625 [==============================] - 154s 246ms/step - loss: 0.4426 - accuracy: 0.7891 - val_loss: 0.6404 - val_accuracy: 0.7360 Epoch 30/30 625/625 [==============================] - 159s 254ms/step - loss: 0.3864 - accuracy: 0.8343 - val_loss: 0.6887 - val_accuracy: 0.6218
In [ ]:
print('early stopping - loss 줄어들지 않으면 학습 중지 - ')
user_callback = [EarlyStopping(monitor = 'val_accuracy', patience = 10)]
nlp_model_history = nlp_model.fit(train_padding, y_train,
batch_size = 32,
epochs = 5 ,
validation_split = 0.2,
verbose = 1,
callbacks = [user_callback])
In [ ]:
print('모델 저장과 모델 복원 - ')
nlp_model.save('model.h5')
In [ ]:
load_nlp_model = load_model('model.h5')
'데이터 분석 > 머신러닝' 카테고리의 다른 글
머신러닝 chatbot 생성하기 (1) | 2023.12.05 |
---|---|
코랩을 이용한 머신러닝 기초 day06 (0) | 2023.11.08 |
코랩을 이용한 머신러닝 기초 day05 (0) | 2023.11.08 |
코랩을 이용한 머신러닝 기초 day04 (0) | 2023.11.08 |
코랩을 이용한 머신러닝 기초 day03 (0) | 2023.11.01 |