목차
반응형
1. 시험 설명
-
- 유형 2
- 문제 수: 1문제 (40점)
- 주제: 데이터 분석 (분류/회귀/비지도학습)
2. 암기할 클래스 모음
- tree 모델
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(random_state=1,
max_depth=10,
min_samples_split=2,
min_samples_leaf=1)
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor(random_state=1,
max_depth=10,
min_samples_split=2,
min_samples_leaf=1)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=1,
n_estimators=3,
max_depth=10,
min_samples_split=2,
min_samples_leaf=1)
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(random_state=1,
n_estimators=3,
max_depth=10,
min_samples_split=2,
min_samples_leaf=1)
import xgboost as xgb
model = xgb.XGBRegressor(random_state=1,
n_estimators=3,
max_depth=10,
min_child_weight=1)
import xgboost as xgb
model = xgb.XGBClassifier(random_state=1,
n_estimators=20,
max_depth=100,
min_child_weight=1)
import lightgbm as lgb
model = lgb.LGBMRegressor(random_state=1,
n_estimators=3,
max_depth=10,
num_leaves=2,
min_child_samples=1)
import lightgbm as lgb
model = lgb.LGBMClassifier(random_state=1,
n_estimators=3,
max_depth=10,
num_leaves=2,
min_child_samples=1)
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import AdaBoostRegressor
- 선형 모델
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(
penalty='l2', # 규제의 유형: 'l1', 'l2', 'elasticnet', 'none' (기본값은 'l2')
C=1.0, # 규제 강도, 값이 작을수록 강한 규제 (기본값은 1.0)
max_iter=100, # 최대 반복 횟수 (기본값은 100)
random_state=1, # 난수 시드 (재현 가능성을 위해 설정)
solver='lbfgs', # 최적화 알고리즘: 'newton-cg', 'lbfgs',
# 'liblinear', 'sag', 'saga' (기본값은 'lbfgs')
)
from sklearn.linear_model import LinearRegression
model = LinearRegression()
from sklearn.linear_model import Ridge
model = Ridge(alpha=1.0) # 0보다큰 숫자, 1이 기본
from sklearn.linear_model import Lasso
model = Lasso(alpha=1.0)
from sklearn.linear_model import ElasticNet
model = ElasticNet(alpha=1, # 정규화 강도
l1_ratio=0.5) # L1, L2비율
- Support Vector Machine (SVM)
from sklearn.svm import SVC
model = SVC(C=1.0, # 정규화 매개변수, 작을수록 오류 허용
kernel='rbf', # linear, poly, sigmoid
gamma='scale', # 커널의 계수, rbf/poly/sigmoid에서 사용
probability=True) # 확률 추정
from sklearn.svm import LinearSVC
model = LinearSVC(C=1.0)
from sklearn.svm import SVR
model = SVR(C=1.0,
kernel='rbf',
gamma='scale')
from sklearn.svm import LinearSVR
model = LinearSVR(C=1.0)
- K-근접
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=3) # 최근접 이웃 3개 사용
from sklearn.neighbors import KNeighborsRegressor
model = KNeighborsRegressor(n_neighbors=3) # 최근접 이웃 3개 사용
- 기타 분류 모델
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis # 선형 판별 분석
from sklearn.neural_network import MLPClassifier # 다층 퍼셉트론
- 기타 회귀 무델
from sklearn.neural_network import MLPRegressor
- 비지도 학습 - 군집화(Clustering)
from sklearn.cluster import KMeans
model = KMeans(n_clusters=3) # 3개의 군집으로 분류
from sklearn.cluster import DBSCAN # 밀도 기반 군집화
model = DBSCAN(eps=0.5, min_samples=5) # 최대 거리 0.5, 최소 샘플 5개
from sklearn.cluster import AgglomerativeClustering # 계층적 군집화
model = AgglomerativeClustering(n_clusters=3, # 3개의 군집
linkage='ward') # Ward 연결 방식
- 모델 검증
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, shuffle=True, stratify=y, random_state=42
)
from sklearn.model_selection import cross_val_score
scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
from sklearn.model_selection import GridSearchCV
param_grid = {
'C': [0.1, 1, 10],
'kernel': ['linear', 'rbf'],
'gamma': ['scale', 'auto']
}
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
from sklearn.model_selection import RandomizedSearchCV
- 메트릭
# 회귀 모델 평가 지표
from sklearn.metrics import r2_score
r2 = r2_score(y_true, y_pred)
from sklearn.metrics import log_loss # 로그 손실
log_loss = log_loss(y_true, y_pred_proba)
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_true, y_pred)
from math import sqrt
rmse = sqrt(mse)
# 분류모델 평가지표
from sklearn.metrics import accuracy_score # 정확도
accuracy = accuracy_score(y_true, y_pred)
from sklearn.metrics import precision_score # 정밀도
precision = precision_score(y_true, y_pred)
from sklearn.metrics import recall_score # 재현율
recall = recall_score(y_true, y_pred)
from sklearn.metrics import f1_score # F1 점수
f1 = f1_score(y_true, y_pred)
from sklearn.metrics import roc_auc_score # ROC AUC 점수
roc_auc = roc_auc_score(y_true, y_pred_proba)
from sklearn.metrics import confusion_matrix # 혼동 행렬
confusion = confusion_matrix(y_true, y_pred)
from sklearn.metrics import classification_report # 분류 보고서
report = classification_report(y_true, y_pred)
- 연관분석
# 아프리오리 알고리즘
from mlxtend.frequent_patterns import apriori
frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)
print(frequent_itemsets)
# 연관 규칙
from mlxtend.frequent_patterns import association_rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
print(rules)
3. 암기할 파라미터 정리
다음 시간에...
반응형
'자격증' 카테고리의 다른 글
[빅데이터분석기사 실기] 함수 모음집 - 유형 1 (0) | 2024.05.28 |
---|