就是統測完後寫的一些code
Dataset: Link
import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import OneHotEncoder, LabelEncoder data = pd.read_csv("/content/ds_salaries.csv") feature = ["experience_level", "employment_type", "salary_in_usd", "company_size", "salary"] le = LabelEncoder() for col in data[feature]: data[col] = le.fit_transform(data[col]) y = data["salary"] data = data[feature] x_train, x_test, y_train, y_test = train_test_split(data, y, test_size=0.2, random_state=1) model = RandomForestRegressor(n_estimators=200, random_state=1) model.fit(x_train, y_train) model.score(x_test, y_test)
Dataset: Link
import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import LabelEncoder train_data = pd.read_csv("train.csv") test_data = pd.read_csv("test.csv") le = LabelEncoder() for i in ["Sex", "Age", "Ticket", "Cabin"]: train_data[i] = le.fit_transform(train_data[i]) test_data[i] = le.fit_transform(test_data[i]) x_train = train_data[["Sex", "Age", "Ticket", "Cabin"]] y_train = train_data["Survived"] x_test = test_data[["Sex", "Age", "Ticket", "Cabin"]] model = RandomForestClassifier(n_estimators=200, random_state=1) model.fit(x_train, y_train) prediction = model.predict(x_test) submission = pd.DataFrame({"PassengerId":test_data["PassengerId"], "Survived":prediction}) submission.to_csv("./submission.csv", index=False)
Dataset: Link
import pandas as pd from sklearn.model_selection import train_test_split from nltk import sent_tokenize, word_tokenize import nltk from nltk.corpus import stopwords nltk.download("punkt") nltk.download("stopwords") data_train = pd.read_csv('SMSCollection.csv', encoding = "ISO-8859-1") data_train x_train, x_test, y_train, y_test = train_test_split(data_train.sms, data_train.Class, test_size = 0.2, random_state = 1) x_train from sklearn.feature_extraction.text import TfidfVectorizer vectorizer = TfidfVectorizer(max_features=2000) x_train = vectorizer.fit_transform(x_train) x_test = vectorizer.fit_transform(x_test) from sklearn import feature_extraction, linear_model from sklearn.naive_bayes import MultinomialNB from sklearn import metrics clf = MultinomialNB().fit(x_train, y_train) clf.fit(x_train, y_train) y_pred = clf.predict(x_test) print(metrics.accuracy_score(y_pred, y_test))
Dataset: Link
from google.colab import files import os if(os.path.exists("kaggle.json") == False): uploaded = files.upload() # 上傳kaggle.json os.mkdir("/root/.kaggle/") with open("/root/.kaggle/kaggle.json", 'wb') as f: f.write(uploaded['kaggle.json']) !chmod 600 ~/.kaggle/kaggle.json !kaggle datasets download -d puneet6060/intel-image-classification --unzip import tensorflow as tf import numpy as np from keras.preprocessing.image import ImageDataGenerator from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D from tensorflow.keras.initializers import random_normal, glorot_uniform from keras import backend as K import keras from keras.models import Model, Sequential train_data_dir = "/content/seg_train" test_data_dir = "/content/seg_test" image_generator = ImageDataGenerator(rescale = 1/255, validation_split = 0.2) train_data = image_generator.flow_from_directory(batch_size = 32, directory = train_data_dir, target_size = (224, 224), subset = "training", class_mode = "categorical" ) val_data = image_generator.flow_from_directory(batch_size = 32, directory = train_data_dir, target_size = (224, 224), subset = "training", class_mode = "categorical" ) test_gen = ImageDataGenerator(rescale = 1/255) test_data = test_gen.flow_from_directory(test_data_dir, target_size = (224, 224), batch_size = 32, shuffle = False) print(train_data.classes) print(val_data.classes) print(train_data.class_indices) print(test_data.classes) class_names = np.unique(train_data.classes) def identity_block(x, f, filters): F1, F2, F3 = filters x_shortcut = x x = Conv2D(filters = F1, kernel_size = (1,1), strides = (1,1), padding = "valid", kernel_initializer = glorot_uniform(seed = 1))(x) x = BatchNormalization(axis = 3)(x) x = Activation("relu")(x) x = Conv2D(filters = F2, kernel_size = (1,1), strides = (1,1), padding ="same", kernel_initializer = glorot_uniform(seed = 1))(x) x = BatchNormalization(axis = 3)(x) x = Activation("relu")(x) x = Conv2D(filters = F3, kernel_size = (1,1), strides = (1,1), padding = "valid", kernel_initializer = glorot_uniform(seed = 1))(x) x = BatchNormalization(axis = 3)(x) x = Add()([x, x_shortcut]) x = Activation("relu")(x) return x def convolutional_block(x, f, filters, s): F1, F2, F3 = filters x_shortcut = x x = Conv2D(F1, (1,1), strides = (s,s), kernel_initializer = glorot_uniform(seed = 1))(x) x = BatchNormalization(axis = 3)(x) x = Activation("relu")(x) x = Conv2D(F2, (f,f), strides = (1,1), padding = "same", kernel_initializer = glorot_uniform(seed = 1))(x) x = BatchNormalization(axis = 3)(x) x = Activation("relu")(x) x = Conv2D(F3, (1,1), strides = (1,1), padding = "valid", kernel_initializer = glorot_uniform(seed = 1))(x) x = BatchNormalization(axis = 3)(x) x_shortcut = Conv2D(F3, (1,1), strides = (s,s), padding = "valid", kernel_initializer = glorot_uniform(seed = 1))(x_shortcut) x_shortcut = BatchNormalization(axis = 3)(x_shortcut) x = Add()([x, x_shortcut]) x = Activation('relu')(x) return x def ResNet50(input_shape = (224, 224, 3), classes = 6): x_input = Input(input_shape) x = ZeroPadding2D((3,3))(x_input) x = Conv2D(64, (7,7), strides = (2,2), kernel_initializer = glorot_uniform(seed = 1))(x) x = BatchNormalization(axis = 3)(x) x = Activation("relu")(x) x = MaxPooling2D((3,3), strides = (2,2))(x) x = convolutional_block(x, f=3, filters = [64, 64, 256], s = 1) x = identity_block(x, 3, [64, 64, 256]) x = identity_block(x, 3, [64, 64, 256]) x = convolutional_block(x, f=3, filters = [128, 128, 512], s = 2) x = identity_block(x, 3, [128, 128, 512]) x = identity_block(x, 3, [128, 128, 512]) x = identity_block(x, 3, [128, 128, 512]) x = convolutional_block(x, f=3, filters = [256, 256, 1024], s = 2) x = identity_block(x, 3, [256, 256, 1024]) x = identity_block(x, 3, [256, 256, 1024]) x = identity_block(x, 3, [256, 256, 1024]) x = identity_block(x, 3, [256, 256, 1024]) x = identity_block(x, 3, [256, 256, 1024]) x = convolutional_block(x, f=3, filters = [512, 512, 2048], s = 2) x = identity_block(x, 3, [512, 512, 2048]) x = identity_block(x, 3, [512, 512, 2048]) x = AveragePooling2D()(x) x = Flatten()(x) x = Dense(6, activation='softmax', kernel_initializer = glorot_uniform(seed=1))(x) model = Model(inputs = x_input, outputs = x) return model model = ResNet50() print(model.summary())