Hi, I'm new to deep learning. I am trying to classify DDoS attacks using the NSL-KDD dataset. My code is working, but I'm having trouble testing the model using the test dataset. Can you check my error in testing?
ANN.ipynb · GitHub[
^]
ValueError: Data cardinality is ambiguous:
x sizes: 22544
y sizes: 125973
Make sure all arrays contain the same number of samples.
What I have tried:
`import pandas as pd
import numpy as np
import sklearn
from sklearn.utils import shuffle
from sklearn.metrics import *
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import warnings
from keras.preprocessing import sequence
from keras import optimizers
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Embedding, SimpleRNN, BatchNormalization
from keras.models import model_from_json
warnings.filterwarnings("ignore")
%matplotlib inline
column_name = pd.read_csv("../Field Names.csv", header = None)
new_columns = list(column_name[0].values)
new_columns = list(column_name[0].values)
new_columns += ['class', 'difficulty']
train_data = pd.read_csv('../KDDTrain+.txt', names = new_columns)
test_data = pd.read_csv('../KDDTest+.txt', names = new_columns)
print("The training data is")
train_data.tail()
print(f"The shape of the training dataframe is : {train_data.shape}")
print("The testing data is")
test_data.head()
print(f"The shape of the testing dataframe is : {test_data.shape}")
map_attacks = [x.strip().split() for x in open('../attacks.txt', 'r')]
map_attacks = {k:v for (k,v) in map_attacks}
train_data['class'] = train_data['class'].replace(map_attacks)
test_data['class'] = test_data['class'].replace(map_attacks)
train_data = shuffle(train_data)
X = train_data.drop('class', axis = 1)
y = train_data['class']
columns = ['protocol_type', 'service', 'flag']
X_new = pd.get_dummies(X, columns = columns, drop_first = True)
X_new
y_new = train_data['class']
y_new = pd.get_dummies(y_new)
y_new = pd.get_dummies(y_new)
X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, test_size=0.2, random_state=101)
sc = StandardScaler()
sc.fit(np.array(X_train))
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)
import tensorflow as tf
model = Sequential()
model.add(Dense(32, input_dim = 120, activation = "relu", kernel_initializer = "lecun_normal"))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(32, activation = "relu"))
model.add(Dense(5, activation = "softmax"))
model.summary()
optim = tf.keras.optimizers.Adam(lr = 0.0001)
model.compile(loss = 'categorical_crossentropy', optimizer = optim, metrics = ['accuracy'])
history = model.fit(X_train, y_train,
batch_size = 32,
epochs = 20,
validation_data = (X_test, y_test))
model.evaluate(test_data,X_new)
classes_predict=model.predict(X_test)`
ValueError: Data cardinality is ambiguous:
x sizes: 22544
y sizes: 125973
Make sure all arrays contain the same number of samples.