인공지능 개발자 수다(유튜브 바로가기) 자세히보기

자격증

[빅데이터분석기사 실기] 함수 모음집 - 유형 2

Suda_777 2024. 5. 30. 00:58

목차

    반응형

    1. 시험 설명

      • 유형 2
      • 문제 수: 1문제 (40점)
      • 주제: 데이터 분석 (분류/회귀/비지도학습)

     

    2. 암기할 클래스 모음

    • tree 모델
    from sklearn.tree import DecisionTreeClassifier
    model = DecisionTreeClassifier(random_state=1,
                                   max_depth=10,
                                   min_samples_split=2,
                                   min_samples_leaf=1)
    
    
    from sklearn.tree import DecisionTreeRegressor
    model = DecisionTreeRegressor(random_state=1,
                                 max_depth=10,
                                 min_samples_split=2,
                                 min_samples_leaf=1)
    
    
    from sklearn.ensemble import RandomForestClassifier
    model = RandomForestClassifier(random_state=1,
                                   n_estimators=3,
                                   max_depth=10,
                                   min_samples_split=2,
                                   min_samples_leaf=1)
                                   
                                   
    from sklearn.ensemble import RandomForestRegressor
    model = RandomForestRegressor(random_state=1,
                                  n_estimators=3,
                                  max_depth=10,
                                  min_samples_split=2,
                                  min_samples_leaf=1)
    
    
    import xgboost as xgb
    model = xgb.XGBRegressor(random_state=1,
                             n_estimators=3,
                             max_depth=10,
                             min_child_weight=1)
    
    
    import xgboost as xgb
    model = xgb.XGBClassifier(random_state=1,
                             n_estimators=20,
                             max_depth=100,
                             min_child_weight=1)
    
    
    import lightgbm as lgb
    model = lgb.LGBMRegressor(random_state=1,
                             n_estimators=3,
                             max_depth=10,
                             num_leaves=2,
                             min_child_samples=1)
    
    
    import lightgbm as lgb
    model = lgb.LGBMClassifier(random_state=1,
                             n_estimators=3,
                             max_depth=10,
                             num_leaves=2,
                             min_child_samples=1)
    
    
    from sklearn.ensemble import ExtraTreesClassifier
    from sklearn.ensemble import ExtraTreesRegressor
    
    from sklearn.ensemble import GradientBoostingClassifier
    from sklearn.ensemble import GradientBoostingRegressor
    
    from sklearn.ensemble import AdaBoostClassifier
    from sklearn.ensemble import AdaBoostRegressor

     

    • 선형 모델
    from sklearn.linear_model import LogisticRegression
    model = LogisticRegression(
        penalty='l2',         # 규제의 유형: 'l1', 'l2', 'elasticnet', 'none' (기본값은 'l2')
        C=1.0,                # 규제 강도, 값이 작을수록 강한 규제 (기본값은 1.0)
        max_iter=100,         # 최대 반복 횟수 (기본값은 100)
        random_state=1,       # 난수 시드 (재현 가능성을 위해 설정)
        solver='lbfgs',       # 최적화 알고리즘: 'newton-cg', 'lbfgs',
        					  # 'liblinear', 'sag', 'saga' (기본값은 'lbfgs')
    )
    
    
    from sklearn.linear_model import LinearRegression
    model = LinearRegression()
    
    
    from sklearn.linear_model import Ridge
    model = Ridge(alpha=1.0) # 0보다큰 숫자, 1이 기본
    
    
    from sklearn.linear_model import Lasso
    model = Lasso(alpha=1.0)
    
    
    from sklearn.linear_model import ElasticNet
    model = ElasticNet(alpha=1, # 정규화 강도
    	l1_ratio=0.5) # L1, L2비율
    • Support Vector Machine (SVM)
    from sklearn.svm import SVC
    model = SVC(C=1.0, # 정규화 매개변수, 작을수록 오류 허용
    	kernel='rbf', # linear, poly, sigmoid
        gamma='scale', # 커널의 계수, rbf/poly/sigmoid에서 사용
        probability=True) # 확률 추정
    
    from sklearn.svm import LinearSVC
    model = LinearSVC(C=1.0)
    
    
    from sklearn.svm import SVR
    model = SVR(C=1.0, 
    	kernel='rbf', 
        gamma='scale')
    
    
    from sklearn.svm import LinearSVR
    model = LinearSVR(C=1.0)

     

    • K-근접
    from sklearn.neighbors import KNeighborsClassifier
    model = KNeighborsClassifier(n_neighbors=3)  # 최근접 이웃 3개 사용
    
    from sklearn.neighbors import KNeighborsRegressor
    model = KNeighborsRegressor(n_neighbors=3)  # 최근접 이웃 3개 사용

     

    • 기타 분류 모델
    from sklearn.naive_bayes import GaussianNB
    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis # 선형 판별 분석
    from sklearn.neural_network import MLPClassifier # 다층 퍼셉트론

     

    • 기타 회귀 무델
    from sklearn.neural_network import MLPRegressor

     

    • 비지도 학습 - 군집화(Clustering)
    from sklearn.cluster import KMeans
    model = KMeans(n_clusters=3)  # 3개의 군집으로 분류
    
    
    from sklearn.cluster import DBSCAN # 밀도 기반 군집화
    model = DBSCAN(eps=0.5, min_samples=5)  # 최대 거리 0.5, 최소 샘플 5개
    
    
    from sklearn.cluster import AgglomerativeClustering # 계층적 군집화
    model = AgglomerativeClustering(n_clusters=3, # 3개의 군집
    	linkage='ward') # Ward 연결 방식

     

     

      • 모델 검증
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
    	X, y, test_size=0.2, shuffle=True, stratify=y, random_state=42
    )
    
    
    from sklearn.model_selection import cross_val_score
    scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
    
    
    from sklearn.model_selection import GridSearchCV
    param_grid = {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf'],
        'gamma': ['scale', 'auto']
    }
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
    
    
    from sklearn.model_selection import RandomizedSearchCV

     

    • 메트릭
    # 회귀 모델 평가 지표
    from sklearn.metrics import r2_score
    r2 = r2_score(y_true, y_pred)
    
    from sklearn.metrics import log_loss # 로그 손실
    log_loss = log_loss(y_true, y_pred_proba)
    
    from sklearn.metrics import mean_squared_error
    mse = mean_squared_error(y_true, y_pred)
    
    from math import sqrt
    rmse = sqrt(mse)
    
    # 분류모델 평가지표
    from sklearn.metrics import accuracy_score # 정확도
    accuracy = accuracy_score(y_true, y_pred)
    
    from sklearn.metrics import precision_score # 정밀도
    precision = precision_score(y_true, y_pred)
    
    from sklearn.metrics import recall_score # 재현율
    recall = recall_score(y_true, y_pred)
    
    from sklearn.metrics import f1_score # F1 점수
    f1 = f1_score(y_true, y_pred)
    
    from sklearn.metrics import roc_auc_score # ROC AUC 점수
    roc_auc = roc_auc_score(y_true, y_pred_proba)
    
    from sklearn.metrics import confusion_matrix # 혼동 행렬
    confusion = confusion_matrix(y_true, y_pred)
    
    from sklearn.metrics import classification_report # 분류 보고서
    report = classification_report(y_true, y_pred)

     

    • 연관분석
    # 아프리오리 알고리즘
    from mlxtend.frequent_patterns import apriori
    
    frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)
    print(frequent_itemsets)
    
    
    # 연관 규칙
    from mlxtend.frequent_patterns import association_rules
    
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
    print(rules)

     

    3. 암기할 파라미터 정리

    다음 시간에...

    반응형

    '자격증' 카테고리의 다른 글

    [빅데이터분석기사 실기] 함수 모음집 - 유형 1  (0) 2024.05.28