import numpy as np
import pandas as pd
# 导入我自定义的模块
import sys
sys.path.append("./code_libs")
import alpha_tools as at
Checking data_tools dependencies... Checking numpy >= 1.20.3... Found numpy == 1.21.5... Checking pandas >= 1.3.4... Found pandas == 1.3.5... Checking bottleneck >= 1.3.2... Found bottleneck == 1.3.2... Checking graphviz >= 0.19.1... Found graphviz == 0.19.1... Checking scikit-learn >= 1.0.1... Found scikit-learn == 1.0.2... Checking xlrd >= 2.0.1... Found xlrd == 2.0.1... Checking psutil >= 5.8.0... Found psutil == 5.9.0... Checking openpyxl >= 3.0.9... Found openpyxl == 3.0.9... Checking xlsxwriter >= 3.0.2... Found xlsxwriter == 3.0.2... Checking data_tools dependencies finished. Checking tree_tools dependencies... Checking numpy >= 1.20.3... Found numpy == 1.21.5... Checking pandas >= 1.3.4... Found pandas == 1.3.5... Checking bottleneck >= 1.3.2... Found bottleneck == 1.3.2... Checking graphviz >= 0.19.1... Found graphviz == 0.19.1... Checking scikit-learn >= 1.0.1... Found scikit-learn == 1.0.2... Checking xlrd >= 2.0.1... Found xlrd == 2.0.1... Checking psutil >= 5.8.0... Found psutil == 5.9.0... Checking openpyxl >= 3.0.9... Found openpyxl == 3.0.9... Checking xlsxwriter >= 3.0.2... Found xlsxwriter == 3.0.2... Checking scipy >= 1.7.1... Found scipy == 1.7.3... Checking imbalanced-learn >= 0.9.0... Found imbalanced-learn == 0.9.0... Checking bayesian-optimization >= 1.2.0... Found bayesian-optimization == 1.2.0... Checking matplotlib >= 3.4.2... Found matplotlib == 3.5.1... Checking statsmodels >= 0.13.1... Found statsmodels == 0.13.1... Checking lxml >= 4.6.3... Found lxml == 4.7.1... Checking sklearn2pmml >= 0.77.2... Found sklearn2pmml == 0.77.2... Checking sklearn_pandas >= 2.2.0... Found sklearn_pandas == 2.2.0... Checking xgboost >= 1.5.2... Found xgboost == 1.5.2... Checking lightgbm >= 3.3.2... Found lightgbm == 3.3.2... Checking catboost >= 1.0.4... Found catboost == 1.0.4... Checking pikepdf >= 4.3.1... Found pikepdf == 4.3.1... Checking reportlab >= 3.5.68... Found reportlab == 3.6.5... Checking svglib >= 1.1.0... Found svglib == 1.1.0... Checking tree_tools dependencies finished. Checking model_tools dependencies... Checking numpy >= 1.20.3... Found numpy == 1.21.5... Checking pandas >= 1.3.4... Found pandas == 1.3.5... Checking bottleneck >= 1.3.2... Found bottleneck == 1.3.2... Checking graphviz >= 0.19.1... Found graphviz == 0.19.1... Checking scikit-learn >= 1.0.1... Found scikit-learn == 1.0.2... Checking xlrd >= 2.0.1... Found xlrd == 2.0.1... Checking psutil >= 5.8.0... Found psutil == 5.9.0... Checking openpyxl >= 3.0.9... Found openpyxl == 3.0.9... Checking xlsxwriter >= 3.0.2... Found xlsxwriter == 3.0.2... Checking scipy >= 1.7.1... Found scipy == 1.7.3... Checking imbalanced-learn >= 0.9.0... Found imbalanced-learn == 0.9.0... Checking bayesian-optimization >= 1.2.0... Found bayesian-optimization == 1.2.0... Checking matplotlib >= 3.4.2... Found matplotlib == 3.5.1... Checking statsmodels >= 0.13.1... Found statsmodels == 0.13.1... Checking lxml >= 4.6.3... Found lxml == 4.7.1... Checking sklearn2pmml >= 0.77.2... Found sklearn2pmml == 0.77.2... Checking sklearn_pandas >= 2.2.0... Found sklearn_pandas == 2.2.0... Checking model_tools dependencies finished.
df_ = pd.read_csv("application_record.csv")
df_response = pd.read_csv("credit_record.csv")
df_["target"] = df_["ID"].map(df_response.groupby("ID")["STATUS"].apply(
lambda x: 1 if ({"2", "3", "4", "5"} & set(x)) else 2 if ({"1"} & set(x)) else 0))
df_['Age'] = -(df_['DAYS_BIRTH'])//365
df_data = df_[df_["target"].notnull()]
from sklearn.model_selection import train_test_split
# 添加时间
df_['birth_year'] = df_["Age"].map(lambda x: str(2021 - x) + "0101")
# 添加权重
df_["weight"] = df_["target"].map(lambda x: 2 if x == 1 else 1)
df_data = df_[df_["target"].notnull()]
df_train, df_test = train_test_split(
df_data, test_size=0.3, stratify=df_data["target"], random_state=42)
at.Analysis.data_flow(df_train, "./数据分析demo/v1/v1.xlsx", test_data=df_test, response="target")
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Using response as split_col_name ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: ID INFO Run: FLAG_WORK_PHONE INFO Run: birth_year WARNING Mixed string numbers birth_year ! INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: ID_asC INFO Match train: birth_year_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating Random1 corr... INFO Creating Random2 corr... INFO Creating Random3 corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: ID_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: birth_year_asC WARNING Mixed string numbers birth_year_asC ! INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating Random1 corr... INFO Creating Random2 corr... INFO Creating Random3 corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: ID_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: birth_year_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.56 s INFO Total: 7.58 s
at.dt.__excel__["gbid_name"]
('good', 'bad', 'ind', 'default')at.dt.__excel__["gbid_name"] = ("normal", "react", "unknow", "react")
at.Analysis.data_flow(df_train, "./数据分析demo/vx/vx.xlsx", test_data=df_test, response="target")
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Using response as split_col_name ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: ID INFO Run: FLAG_WORK_PHONE INFO Run: birth_year WARNING Mixed string numbers birth_year ! INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: ID_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: birth_year_asC INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating Random1 corr... INFO Creating Random2 corr... INFO Creating Random3 corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: ID_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: birth_year_asC WARNING Mixed string numbers birth_year_asC ! INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating Random1 corr... INFO Creating Random2 corr... INFO Creating Random3 corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: ID_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: birth_year_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.58 s INFO Total: 7.60 s
at.dt.__excel__["gbid_name"] = ('good', 'bad', 'ind', 'default')
at.Analysis.data_flow(
df_train, "./数据分析demo/vz/vz.xlsx", test_data=df_test, response="target",
split_col_name="birth_year",
cross_response=["NAME_FAMILY_STATUS", "NAME_INCOME_TYPE"],
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: ID INFO Run: FLAG_WORK_PHONE INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: ID_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (1952-12-31, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, 2001-01-02] corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: ID_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (1952-12-31, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, 2000-01-02] corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: ID_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: Age_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.64 s INFO Total: 7.66 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v2/v2.xlsx", test_data=df_test, response="target",
split_col_name="birth_year",
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: ID INFO Run: FLAG_WORK_PHONE INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: ID_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (1952-12-31, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, 2001-01-02] corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: ID_asC INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (1952-12-31, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, 2000-01-02] corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: ID_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.49 s INFO Total: 7.51 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v2-weight/v2-weight.xlsx", test_data=df_test, response="target",
split_col_name="birth_year", sample_weight_name="weight",
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: ID INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: ID_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: FLAG_OWN_CAR_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (1952-12-31, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, 2001-01-02] corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: ID_asC INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (1952-12-31, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, 2000-01-02] corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: ID_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: Age_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.32 s INFO Total: 7.34 s
df_["weight"] = df_["target"].map(lambda x: 1.5 if x == 1 else 1)
df_data = df_[df_["target"].notnull()]
df_train, df_test = train_test_split(
df_data, test_size=0.3, stratify=df_data["target"], random_state=42)
# 计算
at.Analysis.data_flow(
df_train, "./数据分析demo/v2-weight2/v2-weight2.xlsx", test_data=df_test, response="target",
split_col_name="birth_year", sample_weight_name="weight",
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: ID INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: ID_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (1952-12-31, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, 2001-01-02] corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: ID_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (1952-12-31, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, 2000-01-02] corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: ID_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.20 s INFO Total: 7.23 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v2-rename/v2-rename.xlsx", test_data=df_test, response="target",
split_col_name="birth_year",
var_dict_path="./字段翻译.xlsx", var_series_name="ori_name", var_explain_name="new_name"
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... INFO Loading ./字段翻译.xlsx... INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: ID INFO Run: FLAG_WORK_PHONE INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: DAYS_EMPLOYED INFO Run: FLAG_OWN_REALTY INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: ID_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: CNT_CHILDREN_asC INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: Age_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (1952-12-31, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, 2001-01-02] corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: ID_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (1952-12-31, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, 2000-01-02] corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: ID_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving var_name... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.62 s INFO Total: 7.64 s
pd.crosstab(df_train["OCCUPATION_TYPE"].isnull(), pd.qcut(df_train["Age"], 3)).T
| OCCUPATION_TYPE | False | True |
|---|---|---|
| Age | ||
| (19.999, 37.0] | 7641 | 1621 |
| (37.0, 49.0] | 6622 | 1416 |
| (49.0, 68.0] | 3351 | 4868 |
at.Analysis.data_flow(
df_train, "./数据分析demo/v3/v3.xlsx", test_data=df_test, response="target",
split_col_name="birth_year", split_desc_info=True,
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting save_desc_info to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: ID INFO Run: FLAG_WORK_PHONE INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: DAYS_EMPLOYED INFO Run: FLAG_OWN_REALTY INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: ID_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: CNT_CHILDREN_asC INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (1952-12-31, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, 2001-01-02] corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Creating train (1952-12-31, 1972-01-01] desc tables... INFO Creating train (1972-01-01, 1984-01-01] desc tables... INFO Creating train (1984-01-01, 2001-01-02] desc tables... INFO Match test: ID_asC INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (1952-12-31, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, 2000-01-02] corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Creating test (1952-12-31, 1972-01-01] desc tables... INFO Creating test (1972-01-01, 1984-01-01] desc tables... INFO Creating test (1984-01-01, 2000-01-02] desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: ID_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.83 s INFO Total: 7.86 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v4/v4.xlsx", test_data=df_test, response="target",
split_col_name="birth_year", use_train_time=True
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: ID INFO Run: FLAG_WORK_PHONE INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: ID_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: ID_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: ID_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.58 s INFO Total: 7.61 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v5/v5.xlsx", test_data=df_test, response="target",
split_col_name="birth_year", use_train_time=True, add_info="ID"
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.23 s INFO Total: 7.26 s
df_train["FLAG_MOBIL"].value_counts()
1 25519 Name: FLAG_MOBIL, dtype: int64
df_test["FLAG_MOBIL"].value_counts()
1 10938 Name: FLAG_MOBIL, dtype: int64
at.Analysis.data_flow(
df_train, "./数据分析demo/v6/v6.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: FLAG_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.52 s INFO Total: 7.55 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v7/v7.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
enable_iv_limit=True, iv_threshold=[0.01, np.inf], # 启用iv过滤
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: NAME_INCOME_TYPE_asD INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: NAME_INCOME_TYPE_asD INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 6.04 s INFO Total: 6.07 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v8/v8.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
# enable_iv_limit=True, iv_threshold=[0.01, np.inf], # 暂不启用iv过滤
# 只启用 mean_cross_psi_train 过滤
eval_train_stable=True, mean_cross_psi_train=5,
iv_gap_ratio_train=np.inf, ks_gap_ratio_train=np.inf,
max_percent_gap_train=np.inf, monotonic_cnt_rate_train=0,
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: FLAG_PHONE_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: CODE_GENDER_asD INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.42 s INFO Total: 7.45 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v9/v9.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
# enable_iv_limit=True, iv_threshold=[0.01, np.inf], # 暂不启用iv过滤
# 按预设严格过滤
eval_train_stable=True,
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... WARNING Empty summary ! INFO Saving corr summary... INFO Saving stable_summary... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 5.30 s INFO Total: 5.33 s
at.Analysis.data_flow(
df_train, "./数据分析demo/vy/vy.xlsx", test_data=df_test, response="target",
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
max_discrete_num=15, auto_discrete_max=10, # 同时调低最大离散值和最大合并离散值上限
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: FLAG_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: FLAG_EMAIL_asC INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.37 s INFO Total: 7.40 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v10/v10.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
# enable_iv_limit=True, iv_threshold=[0.01, np.inf], # 暂不启用iv过滤
customized_groups={
"OCCUPATION_TYPE": [
"Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff",
"Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries",
"Managers|High skill tech staff|IT staff",
],
"CNT_CHILDREN": [-np.inf, 0, np.inf],
}
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_PHONE INFO Run: FLAG_MOBIL INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: weight INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: FLAG_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.46 s INFO Total: 7.49 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v9-cumsum/v9-cumsum.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
exclude_column=["ID", "weight"], # 排除权重
cut_method="cumsum"
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: CNT_CHILDREN INFO Run: NAME_HOUSING_TYPE INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.63 s INFO Total: 7.66 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v9-quantile/v9-quantile.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
exclude_column=["ID", "weight"], # 排除权重
cut_method="quantile"
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: FLAG_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: FLAG_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: CNT_CHILDREN_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.65 s INFO Total: 7.68 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v9-linspace/v9-linspace.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
exclude_column=["ID", "weight"], # 排除权重
cut_method="linspace"
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 6.99 s INFO Total: 7.02 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v9-kmeans/v9-kmeans.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
exclude_column=["ID", "weight"], # 排除权重
cut_method="kmeans"
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: FLAG_PHONE_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: CNT_CHILDREN_asC INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: Age_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: CNT_CHILDREN_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 8.39 s INFO Total: 8.42 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v9-bestks/v9-bestks.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
exclude_column=["ID", "weight"], # 排除权重
cut_method="best-ks"
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: FLAG_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: DAYS_BIRTH_asC INFO Match train test: Age_asC INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: FLAG_EMAIL_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.89 s INFO Total: 7.92 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v9-tree/v9-tree.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
exclude_column=["ID", "weight"], # 排除权重
cut_method="tree"
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_WORK_PHONE INFO Run: FLAG_PHONE INFO Run: CNT_CHILDREN INFO Run: NAME_HOUSING_TYPE INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: FLAG_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: DAYS_BIRTH_asC INFO Match train test: Age_asC INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: FLAG_EMAIL_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.80 s INFO Total: 7.83 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v9-chimerge/v9-chimerge.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
exclude_column=["ID", "weight"], # 排除权重
cut_method="chi-merge"
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: CNT_CHILDREN INFO Run: NAME_HOUSING_TYPE INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: Age_asC INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: FLAG_EMAIL_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.83 s INFO Total: 7.87 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v9-ratemerge/v9-ratemerge.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
exclude_column=["ID", "weight"], # 排除权重
cut_method="rate-merge"
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_WORK_PHONE INFO Run: FLAG_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: FLAG_PHONE_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: CNT_CHILDREN_asC INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: FLAG_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: Age_asC INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: CNT_CHILDREN_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.90 s INFO Total: 7.93 s
at.Report.create_data_report(
"./数据分析demo/v9-算法对比/v9-算法对比.xlsx",[
"./数据分析demo/v9-cumsum/v9-cumsum.xlsx", "./数据分析demo/v9-quantile/v9-quantile.xlsx",
"./数据分析demo/v9-linspace/v9-linspace.xlsx", "./数据分析demo/v9-kmeans/v9-kmeans.xlsx",
"./数据分析demo/v9-bestks/v9-bestks.xlsx", "./数据分析demo/v9-tree/v9-tree.xlsx",
"./数据分析demo/v9-chimerge/v9-chimerge.xlsx", "./数据分析demo/v9-ratemerge/v9-ratemerge.xlsx"
], ["cumsum", "quantile", "linspace", "kmeans", "bestks", "tree", "chimerge", "ratemerge"]
)
INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比.xlsx... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-cumsum-Data.xlsx... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-chimerge-验证-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-linspace-开发-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-quantile-开发-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-linspace-Data.xlsx... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-quantile-验证-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-tree-验证-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-tree-开发-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-kmeans-验证-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-chimerge-开发-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-ratemerge-开发-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-bestks-开发-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-cumsum-开发-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-ratemerge-验证-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-linspace-验证-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-chimerge-Data.xlsx... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-bestks-Data.xlsx... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-quantile-Data.xlsx... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-kmeans-开发-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-kmeans-Data.xlsx... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-cumsum-验证-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-ratemerge-Data.xlsx... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-DATA-bestks-验证-DATA_WOE.csv... INFO Removing ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-tree-Data.xlsx... INFO Loading ./数据分析demo/v9-cumsum/Data_v9-cumsum.xlsx... INFO Loading ./数据分析demo/v9-quantile/Data_v9-quantile.xlsx... INFO Loading ./数据分析demo/v9-linspace/Data_v9-linspace.xlsx... INFO Loading ./数据分析demo/v9-kmeans/Data_v9-kmeans.xlsx... INFO Loading ./数据分析demo/v9-bestks/Data_v9-bestks.xlsx... INFO Loading ./数据分析demo/v9-tree/Data_v9-tree.xlsx... INFO Loading ./数据分析demo/v9-chimerge/Data_v9-chimerge.xlsx... INFO Loading ./数据分析demo/v9-ratemerge/Data_v9-ratemerge.xlsx... INFO Creating books summary... INFO Saving... INFO Creating 变量评估-分析汇总-cumsum... INFO Creating 变量评估-分析汇总-quantile... INFO Creating 变量评估-分析汇总-linspace... INFO Creating 变量评估-分析汇总-kmeans... INFO Creating 变量评估-分析汇总-bestks... INFO Creating 变量评估-分析汇总-tree... INFO Creating 变量评估-分析汇总-chimerge... INFO Creating 变量评估-分析汇总-ratemerge... INFO Creating 变量评估-分组详情-cumsum... INFO Creating 变量评估-分组详情-quantile... INFO Creating 变量评估-分组详情-linspace... INFO Creating 变量评估-分组详情-kmeans... INFO Creating 变量评估-分组详情-bestks... INFO Creating 变量评估-分组详情-tree... INFO Creating 变量评估-分组详情-chimerge... INFO Creating 变量评估-分组详情-ratemerge... INFO Creating 变量评估-细分详情-cumsum... INFO Creating 变量评估-细分详情-quantile... INFO Creating 变量评估-细分详情-linspace... INFO Creating 变量评估-细分详情-kmeans... INFO Creating 变量评估-细分详情-bestks... INFO Creating 变量评估-细分详情-tree... INFO Creating 变量评估-细分详情-chimerge... INFO Creating 变量评估-细分详情-ratemerge... INFO Creating 变量评估-稳定性汇总-开发-cumsum... INFO Creating 变量评估-稳定性汇总-开发-quantile... INFO Creating 变量评估-稳定性汇总-开发-linspace... INFO Creating 变量评估-稳定性汇总-开发-kmeans... INFO Creating 变量评估-稳定性汇总-开发-bestks... INFO Creating 变量评估-稳定性汇总-开发-tree... INFO Creating 变量评估-稳定性汇总-开发-chimerge... INFO Creating 变量评估-稳定性汇总-开发-ratemerge... INFO Creating 变量评估-稳定性汇总-验证-cumsum... INFO Creating 变量评估-稳定性汇总-验证-quantile... INFO Creating 变量评估-稳定性汇总-验证-linspace... INFO Creating 变量评估-稳定性汇总-验证-kmeans... INFO Creating 变量评估-稳定性汇总-验证-bestks... INFO Creating 变量评估-稳定性汇总-验证-tree... INFO Creating 变量评估-稳定性汇总-验证-chimerge... INFO Creating 变量评估-稳定性汇总-验证-ratemerge... INFO Saving... INFO Copy files... INFO Copying /home/conda_env/数据分析demo/v9-cumsum/Data_v9-cumsum.xlsx... INFO Copying /home/conda_env/数据分析demo/v9-quantile/Data_v9-quantile.xlsx... INFO Copying /home/conda_env/数据分析demo/v9-linspace/Data_v9-linspace.xlsx... INFO Copying /home/conda_env/数据分析demo/v9-kmeans/Data_v9-kmeans.xlsx... INFO Copying /home/conda_env/数据分析demo/v9-bestks/Data_v9-bestks.xlsx... INFO Copying /home/conda_env/数据分析demo/v9-tree/Data_v9-tree.xlsx... INFO Copying /home/conda_env/数据分析demo/v9-chimerge/Data_v9-chimerge.xlsx... INFO Copying /home/conda_env/数据分析demo/v9-ratemerge/Data_v9-ratemerge.xlsx... INFO Creating csv path... INFO Copying ./数据分析demo/v9-cumsum/TRAIN_DATA_WOE_v9-cumsum.csv... INFO Copying ./数据分析demo/v9-cumsum/TEST_DATA_WOE_v9-cumsum.csv... INFO Copying ./数据分析demo/v9-quantile/TRAIN_DATA_WOE_v9-quantile.csv... INFO Copying ./数据分析demo/v9-quantile/TEST_DATA_WOE_v9-quantile.csv... INFO Copying ./数据分析demo/v9-linspace/TRAIN_DATA_WOE_v9-linspace.csv... INFO Copying ./数据分析demo/v9-linspace/TEST_DATA_WOE_v9-linspace.csv... INFO Copying ./数据分析demo/v9-kmeans/TRAIN_DATA_WOE_v9-kmeans.csv... INFO Copying ./数据分析demo/v9-kmeans/TEST_DATA_WOE_v9-kmeans.csv... INFO Copying ./数据分析demo/v9-bestks/TRAIN_DATA_WOE_v9-bestks.csv... INFO Copying ./数据分析demo/v9-bestks/TEST_DATA_WOE_v9-bestks.csv... INFO Copying ./数据分析demo/v9-tree/TRAIN_DATA_WOE_v9-tree.csv... INFO Copying ./数据分析demo/v9-tree/TEST_DATA_WOE_v9-tree.csv... INFO Copying ./数据分析demo/v9-chimerge/TRAIN_DATA_WOE_v9-chimerge.csv... INFO Copying ./数据分析demo/v9-chimerge/TEST_DATA_WOE_v9-chimerge.csv... INFO Copying ./数据分析demo/v9-ratemerge/TRAIN_DATA_WOE_v9-ratemerge.csv... INFO Copying ./数据分析demo/v9-ratemerge/TEST_DATA_WOE_v9-ratemerge.csv...
(['./数据分析demo/v9-cumsum/v9-cumsum.xlsx',
'./数据分析demo/v9-quantile/v9-quantile.xlsx',
'./数据分析demo/v9-linspace/v9-linspace.xlsx',
'./数据分析demo/v9-kmeans/v9-kmeans.xlsx',
'./数据分析demo/v9-bestks/v9-bestks.xlsx',
'./数据分析demo/v9-tree/v9-tree.xlsx',
'./数据分析demo/v9-chimerge/v9-chimerge.xlsx',
'./数据分析demo/v9-ratemerge/v9-ratemerge.xlsx'],
['cumsum',
'quantile',
'linspace',
'kmeans',
'bestks',
'tree',
'chimerge',
'ratemerge'],
None,
{'./数据分析demo/v9-算法对比/v9-算法对比-DATA-cumsum-开发-DATA_WOE.csv': './数据分析demo/v9-cumsum/TRAIN_DATA_WOE_v9-cumsum.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-cumsum-验证-DATA_WOE.csv': './数据分析demo/v9-cumsum/TEST_DATA_WOE_v9-cumsum.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-quantile-开发-DATA_WOE.csv': './数据分析demo/v9-quantile/TRAIN_DATA_WOE_v9-quantile.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-quantile-验证-DATA_WOE.csv': './数据分析demo/v9-quantile/TEST_DATA_WOE_v9-quantile.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-linspace-开发-DATA_WOE.csv': './数据分析demo/v9-linspace/TRAIN_DATA_WOE_v9-linspace.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-linspace-验证-DATA_WOE.csv': './数据分析demo/v9-linspace/TEST_DATA_WOE_v9-linspace.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-kmeans-开发-DATA_WOE.csv': './数据分析demo/v9-kmeans/TRAIN_DATA_WOE_v9-kmeans.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-kmeans-验证-DATA_WOE.csv': './数据分析demo/v9-kmeans/TEST_DATA_WOE_v9-kmeans.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-bestks-开发-DATA_WOE.csv': './数据分析demo/v9-bestks/TRAIN_DATA_WOE_v9-bestks.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-bestks-验证-DATA_WOE.csv': './数据分析demo/v9-bestks/TEST_DATA_WOE_v9-bestks.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-tree-开发-DATA_WOE.csv': './数据分析demo/v9-tree/TRAIN_DATA_WOE_v9-tree.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-tree-验证-DATA_WOE.csv': './数据分析demo/v9-tree/TEST_DATA_WOE_v9-tree.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-chimerge-开发-DATA_WOE.csv': './数据分析demo/v9-chimerge/TRAIN_DATA_WOE_v9-chimerge.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-chimerge-验证-DATA_WOE.csv': './数据分析demo/v9-chimerge/TEST_DATA_WOE_v9-chimerge.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-ratemerge-开发-DATA_WOE.csv': './数据分析demo/v9-ratemerge/TRAIN_DATA_WOE_v9-ratemerge.csv',
'./数据分析demo/v9-算法对比/v9-算法对比-DATA-ratemerge-验证-DATA_WOE.csv': './数据分析demo/v9-ratemerge/TEST_DATA_WOE_v9-ratemerge.csv'})at.Analysis.data_flow(
df_train, "./数据分析demo/v9-left/v9-left.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
# enable_iv_limit=True, iv_threshold=[0.01, np.inf], # 暂不启用iv过滤
include_right=False, # 分箱展示调整为左闭右开
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: weight INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01) corr... INFO Creating [1972-01-01, 1984-01-01) corr... INFO Creating [1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: FLAG_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01) corr... INFO Creating [1972-01-01, 1984-01-01) corr... INFO Creating [1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.68 s INFO Total: 7.72 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v9-random/v9-random.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
# enable_iv_limit=True, iv_threshold=[0.01, np.inf], # 暂不启用iv过滤
random_cut=True, # 启用随机分箱
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: FLAG_PHONE_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: FLAG_OWN_CAR_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: Age_asC INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: CNT_CHILDREN_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 8.00 s INFO Total: 8.03 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v11/v11.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
# enable_iv_limit=True, iv_threshold=[0.01, np.inf], # 暂不启用iv过滤
customized_groups={
"OCCUPATION_TYPE": [
"Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff",
"Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries",
"Managers|High skill tech staff|IT staff",
],
"CNT_CHILDREN": [-np.inf, 0, np.inf],
},
exclude_column=["ID", "weight"], # 排除权重
keep_separate_value=1, # 设置1为单独一组
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... INFO Checking separate value... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: DAYS_BIRTH INFO Run: CODE_GENDER INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: FLAG_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: FLAG_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.62 s INFO Total: 7.65 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v11-tree/v11-tree.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
# enable_iv_limit=True, iv_threshold=[0.01, np.inf], # 暂不启用iv过滤
customized_groups={
"OCCUPATION_TYPE": [
"Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff",
"Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries",
"Managers|High skill tech staff|IT staff",
],
"CNT_CHILDREN": [-np.inf, 0, np.inf],
},
exclude_column=["ID", "weight"], # 排除权重
keep_separate_value=1, # 设置1为单独一组
cut_method="tree", # 调整算法尝试
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... INFO Checking separate value... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_WORK_PHONE INFO Run: FLAG_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: FLAG_EMAIL INFO Run: NAME_INCOME_TYPE INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: CNT_CHILDREN_asC INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: DAYS_BIRTH_asC INFO Match train test: Age_asC INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: CNT_CHILDREN_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.05 s INFO Total: 7.08 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v12/v12.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
# enable_iv_limit=True, iv_threshold=[0.01, np.inf], # 暂不启用iv过滤
customized_groups={
"OCCUPATION_TYPE": [
"Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff",
"Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries",
"Managers|High skill tech staff|IT staff",
],
"CNT_CHILDREN": [-np.inf, 0, np.inf],
},
exclude_column=["ID", "weight"], # 排除权重
min_group_percent=0,
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: DAYS_BIRTH INFO Run: CODE_GENDER INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: FLAG_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: FLAG_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.66 s INFO Total: 7.69 s
help(at.Report.create_data_report)
Help on function in module alpha_tools:
create_data_report(new_excel_name, excel_path_list, excel_name_list, train_test_dir_cn=None)
Usage:
---------
create_data_report(
"xxx.xlsx", [
"file1.xlsx", "file2.xlsx", ...
], [
"filename1", "filename2", ...
], ["train_display_name", "test_display_name"]
)
---------
help(at.Report.create_data_report2)
Help on function in module alpha_tools:
create_data_report2(new_excel_name, output_name, train_name, test_names)
Usage:
---------
Example1:
---------
create_data_report2(
"xxx.xlsx", "report.xlsx", "train", ["test1", "test2", ...]
)
---------
Example2:
---------
create_data_report2(
"xxx.xlsx", {
"name1": "report1.xlsx",
"name2": "report2.xlsx",
...
}, "train", ["test1", "test2", ...]
)
---------
import json
print(json.dumps(at.dt.__excel__, ensure_ascii=False, indent=4))
{
"header": {
"bold": true,
"font_name": "Times New Roman",
"italic": true,
"bg_color": "#000000",
"font_color": "#FFFFFF",
"align": "center",
"valign": "vdistributed",
"border": true,
"border_color": "#FFFFFF"
},
"font_name_upper": {
"Times New Roman": 1.5
},
"range_color_dict": {
"type": "3_color_scale"
},
"col_color_dict": {
"type": "data_bar"
},
"logging_excel_format": false,
"freeze_table_panes": true,
"row_space": 3,
"text_wrap_trigger": 36,
"text_wrap_format": {
"text_wrap": true,
"align": "center",
"valign": "vdistributed"
},
"desc_height": 6,
"desc_chart_style": 10,
"desc_num": 3,
"text": [
"var",
"No"
],
"0.00%": [
"percent",
"rate",
"ratio"
],
"X,XX0": [
"count",
"total",
"negatives",
"positives"
],
"0.00": [
"woe",
"iv",
"ks",
"max_percent_gap",
"psi",
"odds"
],
"X,XX0.00": [
"std",
"mean",
"median",
"coef",
"values",
"chi2",
"std_err",
"%"
],
"gbid_name": [
"good",
"bad",
"ind",
"default"
],
"train_test_suf": [
"_train",
"_test"
],
"psi_invalid_mask": 0.0001,
"else_txt": "ELSE?",
"quantile_point": [
0.0,
0.01,
0.05,
0.15,
0.25,
0.35,
0.45,
0.5,
0.55,
0.65,
0.75,
0.85,
0.95,
0.99,
1.0
],
"directory": {
"data_tools": {
"dir": "目录",
"desc": "整体评估 - 数据描述",
"cross": "整体评估 - 交叉分布",
"unique": "整体评估 - 数据唯一性",
"summary": "变量评估 - 分析汇总",
"cut_method": "变量评估 - 分箱算法",
"woe_corr": "变量评估 - 相关性列表",
"ori_corr": "变量评估 - 相关性列表",
"stable_summary": "变量评估 - 稳定性汇总",
"stable_iv_ks": "变量评估 - 排序稳定性",
"stable_psi": "变量评估 - 分布稳定性",
"stable_cross_psi": "变量评估 - 分布稳定性 - 对比",
"stable_quantile": "变量评估 - 分位数稳定性",
"detail": "变量评估 - 分组详情",
"draft": "变量评估 - 细分详情",
"drop": "变量评估 - 排除详情",
"drop_count": "变量评估 - 排除统计",
"var_name": "变量评估 - 翻译"
}
},
"dir_setting": {
"display_as_sheetname": false,
"ignore_sheetname": [
"setting",
"runtime"
],
"save_order": [
"目录",
"整体评估",
"变量评估",
"规则评估",
"模型评估",
"模型应用",
"模型部署"
],
"train_test_dir_cn": [
"开发",
"验证"
],
"start_row_col": [
2,
1
]
},
"report_setting": {
"data_tools": {
"train_flag": [
"TRAIN",
"_train.csv"
],
"test_flag": [
"TEST",
"_test.csv"
],
"auto_remove_duplicates": false,
"copy_report_files": true,
"summary_precision": 2,
"summary_range": [
"iv",
"ks",
"iv_gap_ratio",
"ks_gap_ratio",
"mean_cross_psi",
"train_test_total_psi"
],
"sheetname_range": [
"summary",
"detail",
"draft",
"train_stable_summary",
"test_stable_summary"
],
"report_files_mark": [
"RAW_DATA",
"DATA_WOE",
"DATA_ORI"
]
}
}
}
at.Analysis.data_flow(
df_train, "./数据分析demo/v12-raw/v12-raw.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
# enable_iv_limit=True, iv_threshold=[0.01, np.inf], # 暂不启用iv过滤
customized_groups={
"OCCUPATION_TYPE": [
"Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff",
"Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries",
"Managers|High skill tech staff|IT staff",
],
"CNT_CHILDREN": [-np.inf, 0, np.inf],
},
exclude_column=["ID", "weight"], # 排除权重
min_group_percent=0,
save_raw_dataframe=True,
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_PHONE INFO Run: FLAG_MOBIL INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: FLAG_PHONE_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Parsing raw data... INFO Saving train raw data... INFO Saving test raw data... INFO Total: 8.03 s INFO Total: 8.07 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v12-return/v12-return.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
# enable_iv_limit=True, iv_threshold=[0.01, np.inf], # 暂不启用iv过滤
customized_groups={
"OCCUPATION_TYPE": [
"Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff",
"Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries",
"Managers|High skill tech staff|IT staff",
],
"CNT_CHILDREN": [-np.inf, 0, np.inf],
},
exclude_column=["ID", "weight"], # 排除权重
min_group_percent=0,
save_or_return=False,
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: DAYS_BIRTH INFO Run: CODE_GENDER INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: FLAG_PHONE_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: CNT_CHILDREN_asC INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: FLAG_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.13 s INFO Total: 7.18 s
at.dt.__data__
{'train_data': WOE_AMT_INCOME_TOTAL_asC WOE_Age_asC WOE_NAME_EDUCATION_TYPE_asD \
0 0.054842 0.010437 0.057481
1 0.054842 -0.010596 -0.062703
2 0.054842 0.010437 -0.062703
3 0.054842 0.010437 -0.344011
4 0.011731 0.010437 0.057481
... ... ... ...
25514 -0.039322 -0.010596 -0.062703
25515 0.054842 -0.010596 0.057481
25516 -0.039322 -0.010596 0.057481
25517 0.011731 0.010437 -0.062703
25518 0.054842 0.010437 0.057481
WOE_FLAG_EMAIL_asC WOE_NAME_INCOME_TYPE_asD WOE_DAYS_BIRTH_asC \
0 -0.003220 -0.008825 0.021147
1 -0.003220 0.089079 -0.020711
2 -0.003220 -0.008825 0.021147
3 0.033655 -0.008825 0.021147
4 -0.003220 -0.008825 0.021147
... ... ... ...
25514 -0.003220 0.089079 -0.020711
25515 -0.003220 0.089079 -0.020711
25516 -0.003220 -0.251283 -0.020711
25517 -0.003220 0.089079 0.021147
25518 -0.003220 -0.008825 0.021147
WOE_CNT_FAM_MEMBERS_asC WOE_CODE_GENDER_asD WOE_DAYS_EMPLOYED_asC \
0 0.003248 0.071637 0.217895
1 -0.008668 -0.133011 0.217895
2 -0.008668 -0.133011 0.217895
3 -0.008668 0.071637 0.019934
4 0.003248 -0.133011 0.019934
... ... ... ...
25514 0.003248 0.071637 0.217895
25515 0.003248 0.071637 0.019934
25516 0.003248 0.071637 -0.196770
25517 -0.008668 -0.133011 0.019934
25518 0.003248 0.071637 -0.196770
WOE_FLAG_OWN_CAR_asD ... OCCUPATION_TYPE_asD \
0 -0.023932 ... Laborers
1 0.040442 ... Managers
2 0.040442 ... missing
3 -0.023932 ... missing
4 -0.023932 ... Laborers
... ... ... ...
25514 -0.023932 ... Laborers
25515 -0.023932 ... missing
25516 -0.023932 ... missing
25517 0.040442 ... High skill tech staff
25518 0.040442 ... Managers
NAME_FAMILY_STATUS_asD FLAG_OWN_REALTY_asD CNT_CHILDREN_asC \
0 Single / not married Y (-inf, 0.0]
1 Married Y (0.0, inf]
2 Married Y (0.0, inf]
3 Married Y (0.0, inf]
4 Married N (-inf, 0.0]
... ... ... ...
25514 Single / not married Y (-inf, 0.0]
25515 Married N (-inf, 0.0]
25516 Married Y (-inf, 0.0]
25517 Married N (0.0, inf]
25518 Married N (-inf, 0.0]
FLAG_PHONE_asC NAME_HOUSING_TYPE_asD FLAG_WORK_PHONE_asC target \
0 (-inf, 0.0] Rented apartment (-inf, 0.0] 0.0
1 (-inf, 0.0] Office apartment (-inf, 0.0] 0.0
2 (-inf, 0.0] House / apartment (-inf, 0.0] 0.0
3 (-inf, 0.0] House / apartment (-inf, 0.0] 0.0
4 (0.0, 2.0] House / apartment (0.0, 2.0] 0.0
... ... ... ... ...
25514 (-inf, 0.0] House / apartment (-inf, 0.0] 0.0
25515 (-inf, 0.0] House / apartment (-inf, 0.0] 0.0
25516 (-inf, 0.0] House / apartment (-inf, 0.0] 2.0
25517 (-inf, 0.0] With parents (-inf, 0.0] 0.0
25518 (-inf, 0.0] House / apartment (-inf, 0.0] 2.0
ID birth_year
0 5008901 19790101
1 5010527 19710101
2 5047865 19870101
3 5062550 19880101
4 5143056 19830101
... ... ...
25514 5094888 19630101
25515 5037291 19600101
25516 5046225 19590101
25517 5045548 19920101
25518 5115657 19810101
[25519 rows x 37 columns],
'test_data': WOE_AMT_INCOME_TOTAL_asC WOE_NAME_EDUCATION_TYPE_asD WOE_Age_asC \
0 0.011731 0.057481 -0.010596
1 0.054842 -0.062703 -0.010596
2 0.054842 0.057481 -0.010596
3 0.011731 0.057481 -0.010596
4 -0.039322 0.057481 -0.010596
... ... ... ...
10933 0.011731 0.057481 -0.010596
10934 -0.039322 -0.344011 0.010437
10935 -0.039322 0.057481 -0.010596
10936 0.011731 0.057481 -0.010596
10937 0.011731 -0.062703 -0.010596
WOE_FLAG_EMAIL_asC WOE_NAME_INCOME_TYPE_asD WOE_CNT_FAM_MEMBERS_asC \
0 -0.00322 0.089079 -0.008668
1 -0.00322 -0.251283 0.003248
2 -0.00322 -0.008825 -0.008668
3 -0.00322 -0.251283 0.003248
4 -0.00322 0.089079 0.003248
... ... ... ...
10933 -0.00322 0.089079 0.003248
10934 -0.00322 0.089079 -0.008668
10935 -0.00322 -0.251283 0.003248
10936 -0.00322 0.089079 0.003248
10937 -0.00322 -0.251283 0.003248
WOE_DAYS_BIRTH_asC WOE_CODE_GENDER_asD WOE_FLAG_OWN_CAR_asD \
0 -0.020711 0.071637 -0.023932
1 -0.020711 -0.133011 0.040442
2 -0.020711 -0.133011 0.040442
3 -0.020711 0.071637 -0.023932
4 -0.020711 0.071637 -0.023932
... ... ... ...
10933 -0.020711 -0.133011 0.040442
10934 0.021147 0.071637 0.040442
10935 -0.020711 0.071637 -0.023932
10936 -0.020711 0.071637 0.040442
10937 -0.020711 -0.133011 0.040442
WOE_DAYS_EMPLOYED_asC ... OCCUPATION_TYPE_asD FLAG_OWN_REALTY_asD \
0 0.217895 ... Medicine staff N
1 -0.196770 ... missing Y
2 -0.196770 ... missing Y
3 -0.196770 ... missing Y
4 0.019934 ... Sales staff Y
... ... ... ... ...
10933 0.019934 ... Drivers Y
10934 -0.196770 ... Core staff Y
10935 -0.196770 ... missing N
10936 0.217895 ... Laborers N
10937 -0.196770 ... missing Y
NAME_FAMILY_STATUS_asD CNT_CHILDREN_asC NAME_HOUSING_TYPE_asD \
0 Married (0.0, inf] House / apartment
1 Single / not married (-inf, 0.0] House / apartment
2 Married (0.0, inf] House / apartment
3 Married (-inf, 0.0] House / apartment
4 Civil marriage (-inf, 0.0] House / apartment
... ... ... ...
10933 Civil marriage (-inf, 0.0] House / apartment
10934 Married (0.0, inf] House / apartment
10935 Single / not married (-inf, 0.0] House / apartment
10936 Married (-inf, 0.0] House / apartment
10937 Separated (-inf, 0.0] House / apartment
FLAG_WORK_PHONE_asC FLAG_PHONE_asC target ID birth_year
0 (-inf, 0.0] (-inf, 0.0] 0.0 5142144 19750101
1 (-inf, 0.0] (-inf, 0.0] 0.0 5009990 19590101
2 (-inf, 0.0] (-inf, 0.0] 2.0 5024737 19760101
3 (-inf, 0.0] (-inf, 0.0] 0.0 5091905 19660101
4 (-inf, 0.0] (-inf, 0.0] 0.0 5135622 19780101
... ... ... ... ... ...
10933 (0.0, 2.0] (0.0, 2.0] 0.0 5029518 19700101
10934 (0.0, 2.0] (0.0, 2.0] 0.0 5113288 19900101
10935 (-inf, 0.0] (0.0, 2.0] 0.0 5136905 19580101
10936 (0.0, 2.0] (0.0, 2.0] 0.0 5089537 19710101
10937 (-inf, 0.0] (-inf, 0.0] 0.0 5058476 19650101
[10938 rows x 37 columns]}at.Analysis.data_flow(
df_train, "./数据分析demo/v12-ori/v12-ori.xlsx", test_data=df_test, response="target", # 基础定义
split_col_name="birth_year", use_train_time=True, # 时间切片
add_info="ID", # 主键添加
enable_single_threshold=False, # 关闭单一值过滤
# enable_iv_limit=True, iv_threshold=[0.01, np.inf], # 暂不启用iv过滤
customized_groups={
"OCCUPATION_TYPE": [
"Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff",
"Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries",
"Managers|High skill tech staff|IT staff",
],
"CNT_CHILDREN": [-np.inf, 0, np.inf],
},
exclude_column=["ID", "weight"], # 排除权重
min_group_percent=0,
save_ori=True, replace_option={"num_replace": {0: -999}}
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: CNT_CHILDREN INFO Run: NAME_HOUSING_TYPE INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.16 s INFO Total: 7.20 s
at.dt.Api.zip_dir_files("./数据分析demo/v12-ori.zip", "./数据分析demo/v12-ori")
INFO Removing ./数据分析demo/v12-ori.zip... INFO Saving ./数据分析demo/v12-ori/TEST_DATA_ORI_v12-ori.csv... INFO Saving ./数据分析demo/v12-ori/TRAIN_DATA_ORI_v12-ori.csv... INFO Saving ./数据分析demo/v12-ori/Data_v12-ori.xlsx... INFO Saving ./数据分析demo/v12-ori/Data_v12-ori.json... INFO Saving ./数据分析demo/v12-ori/Data_v12-ori_data.pkl...
at.dt.Api.zip_dir_files("./数据分析demo/v12.zip", "./数据分析demo/v12")
INFO Removing ./数据分析demo/v12.zip... INFO Saving ./数据分析demo/v12/TEST_DATA_WOE_v12.csv... INFO Saving ./数据分析demo/v12/Data_v12_data.pkl... INFO Saving ./数据分析demo/v12/Data_v12.xlsx... INFO Saving ./数据分析demo/v12/Data_v12.json... INFO Saving ./数据分析demo/v12/TRAIN_DATA_WOE_v12.csv...
at.dt.Api.zip_dir_files("./数据分析demo/v12-raw.zip", "./数据分析demo/v12-raw")
INFO Removing ./数据分析demo/v12-raw.zip... INFO Saving ./数据分析demo/v12-raw/Data_v12-raw.json... INFO Saving ./数据分析demo/v12-raw/TRAIN_DATA_WOE_v12-raw.csv... INFO Saving ./数据分析demo/v12-raw/Data_v12-raw.xlsx... INFO Saving ./数据分析demo/v12-raw/TEST_RAW_DATA_v12-raw.csv... INFO Saving ./数据分析demo/v12-raw/TRAIN_RAW_DATA_v12-raw.csv... INFO Saving ./数据分析demo/v12-raw/TEST_DATA_WOE_v12-raw.csv... INFO Saving ./数据分析demo/v12-raw/Data_v12-raw_data.pkl...
at.dt.Api.zip_dir_files("./数据分析demo/v12-return.zip", "./数据分析demo/v12-return")
INFO Removing ./数据分析demo/v12-return.zip... INFO Saving ./数据分析demo/v12-return/Data_v12-return_data.pkl... INFO Saving ./数据分析demo/v12-return/Data_v12-return.xlsx... INFO Saving ./数据分析demo/v12-return/Data_v12-return.json...
pd.read_pickle("./数据分析demo/v12/Data_v12_data.pkl")
{'_idx_range_FLAG_PHONE_asC': ['(-inf, 0.0]', '(0.0, 2.0]'],
'group_FLAG_PHONE_asC': [-inf, 0, 2],
'_draft_group_FLAG_PHONE_asC': [-inf, 0, inf],
'_idx_range_FLAG_WORK_PHONE_asC': ['(-inf, 0.0]', '(0.0, 2.0]'],
'_cap_floor_FLAG_PHONE_asC': None,
'group_FLAG_WORK_PHONE_asC': [-inf, 0, 2],
'_draft_group_FLAG_WORK_PHONE_asC': [-inf, 0, inf],
'_cap_floor_FLAG_WORK_PHONE_asC': None,
'WOE_FLAG_WORK_PHONE_asC': {'(-inf, 0.0]': 0.018080441,
'(0.0, 2.0]': -0.05985639},
'WOE_FLAG_PHONE_asC': {'(-inf, 0.0]': -0.01889305, '(0.0, 2.0]': 0.046835206},
'_idx_range_CNT_CHILDREN_asC': ['(-inf, 0.0]', '(0.0, inf]'],
'group_CNT_CHILDREN_asC': [-inf, 0, inf],
'_cap_floor_CNT_CHILDREN_asC': None,
'_idx_range_NAME_HOUSING_TYPE_asD': ['Municipal apartment|Co-op apartment|Office apartment',
'Rented apartment|With parents|House / apartment'],
'_draft_group_NAME_HOUSING_TYPE_asD': [],
'WOE_CNT_CHILDREN_asC': {'(-inf, 0.0]': -0.007400325,
'(0.0, inf]': 0.016927276},
'WOE_NAME_HOUSING_TYPE_asD': ({'Rented apartment': 0.028387127,
'With parents': 0.028387127,
'House / apartment': 0.028387127,
'Municipal apartment': -0.48593503,
'Co-op apartment': -0.48593503,
'Office apartment': -0.48593503},
{'Rented apartment': 'Rented apartment|With parents|House / apartment',
'With parents': 'Rented apartment|With parents|House / apartment',
'House / apartment': 'Rented apartment|With parents|House / apartment',
'Municipal apartment': 'Municipal apartment|Co-op apartment|Office apartment',
'Co-op apartment': 'Municipal apartment|Co-op apartment|Office apartment',
'Office apartment': 'Municipal apartment|Co-op apartment|Office apartment',
'missing': 'missing'}),
'_idx_range_FLAG_OWN_REALTY_asD': ['N', 'Y'],
'_draft_group_FLAG_OWN_REALTY_asD': [],
'_idx_range_NAME_FAMILY_STATUS_asD': ['Civil marriage|Married',
'Separated|Single / not married',
'Widow'],
'_draft_group_NAME_FAMILY_STATUS_asD': [],
'WOE_FLAG_OWN_REALTY_asD': ({'Y': 0.15774061, 'N': -0.26626292},
{'Y': 'Y', 'N': 'N', 'missing': 'missing'}),
'WOE_NAME_FAMILY_STATUS_asD': ({'Civil marriage': 0.086759016,
'Married': 0.086759016,
'Separated': -0.10938848,
'Single / not married': -0.10938848,
'Widow': -0.68922174},
{'Civil marriage': 'Civil marriage|Married',
'Married': 'Civil marriage|Married',
'Separated': 'Separated|Single / not married',
'Single / not married': 'Separated|Single / not married',
'Widow': 'Widow',
'missing': 'missing'}),
'_idx_range_OCCUPATION_TYPE_asD': ['Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff',
'Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries',
'Managers|High skill tech staff|IT staff',
'missing'],
'_idx_range_DAYS_EMPLOYED_asC': ['(-15662, -2501]',
'(-2501, -746]',
'(-746, 365244]'],
'group_DAYS_EMPLOYED_asC': [-15662, -2501, -746, 365244],
'_draft_group_DAYS_EMPLOYED_asC': [-inf,
-5329,
-3592,
-2744,
-2076,
-1539,
-1051,
-596,
-183,
inf],
'_cap_floor_DAYS_EMPLOYED_asC': None,
'WOE_OCCUPATION_TYPE_asD': ({'Accountants': 0.07481758,
'Core staff': 0.07481758,
'HR staff': 0.07481758,
'Medicine staff': 0.07481758,
'Private service staff': 0.07481758,
'Realty agents': 0.07481758,
'Sales staff': 0.07481758,
'Secretaries': 0.07481758,
'missing': -0.014318004,
'Cleaning staff': -0.018571466,
'Cooking staff': -0.018571466,
'Drivers': -0.018571466,
'Laborers': -0.018571466,
'Low-skill Laborers': -0.018571466,
'Security staff': -0.018571466,
'Waiters/barmen staff': -0.018571466,
'Managers': -0.07952402,
'High skill tech staff': -0.07952402,
'IT staff': -0.07952402},
{'Accountants': 'Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries',
'Core staff': 'Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries',
'HR staff': 'Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries',
'Medicine staff': 'Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries',
'Private service staff': 'Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries',
'Realty agents': 'Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries',
'Sales staff': 'Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries',
'Secretaries': 'Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries',
'missing': 'missing',
'Cleaning staff': 'Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff',
'Cooking staff': 'Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff',
'Drivers': 'Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff',
'Laborers': 'Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff',
'Low-skill Laborers': 'Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff',
'Security staff': 'Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff',
'Waiters/barmen staff': 'Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff',
'Managers': 'Managers|High skill tech staff|IT staff',
'High skill tech staff': 'Managers|High skill tech staff|IT staff',
'IT staff': 'Managers|High skill tech staff|IT staff'}),
'_idx_range_FLAG_OWN_CAR_asD': ['N', 'Y'],
'_draft_group_FLAG_OWN_CAR_asD': [],
'WOE_DAYS_EMPLOYED_asC': {'(-15662, -2501]': 0.21789497,
'(-2501, -746]': 0.019934393,
'(-746, 365244]': -0.19676958},
'WOE_FLAG_OWN_CAR_asD': ({'Y': 0.04044154, 'N': -0.023931751},
{'Y': 'Y', 'N': 'N', 'missing': 'missing'}),
'_idx_range_CODE_GENDER_asD': ['F', 'M'],
'_draft_group_CODE_GENDER_asD': [],
'WOE_CODE_GENDER_asD': ({'F': 0.071636915, 'M': -0.13301057},
{'F': 'F', 'M': 'M', 'missing': 'missing'}),
'_idx_range_CNT_FAM_MEMBERS_asC': ['(0.0, 2.0]', '(2.0, 21.0]'],
'group_CNT_FAM_MEMBERS_asC': [0.0, 2.0, 21.0],
'_draft_group_CNT_FAM_MEMBERS_asC': [-inf, 1.0, 2.0, 3.0, inf],
'_cap_floor_CNT_FAM_MEMBERS_asC': None,
'_idx_range_DAYS_BIRTH_asC': ['(-25153, -15602]', '(-15602, -7488]'],
'group_DAYS_BIRTH_asC': [-25153, -15602, -7488],
'_draft_group_DAYS_BIRTH_asC': [-inf,
-22014,
-20337,
-18650,
-17051,
-15595,
-14400,
-13144,
-11841,
-10482,
inf],
'_cap_floor_DAYS_BIRTH_asC': None,
'WOE_CNT_FAM_MEMBERS_asC': {'(0.0, 2.0]': 0.0032483533,
'(2.0, 21.0]': -0.008667862},
'WOE_DAYS_BIRTH_asC': {'(-25153, -15602]': -0.020711053,
'(-15602, -7488]': 0.02114719},
'_idx_range_FLAG_EMAIL_asC': ['(-inf, 0.0]', '(0.0, 2.0]'],
'_idx_range_NAME_INCOME_TYPE_asD': ['Commercial associate',
'Pensioner',
'Student|State servant|Working'],
'_draft_group_NAME_INCOME_TYPE_asD': [],
'group_FLAG_EMAIL_asC': [-inf, 0, 2],
'_draft_group_FLAG_EMAIL_asC': [-inf, 0, inf],
'_cap_floor_FLAG_EMAIL_asC': None,
'WOE_NAME_INCOME_TYPE_asD': ({'Student': 0.08907906,
'State servant': 0.08907906,
'Working': 0.08907906,
'Commercial associate': -0.008824969,
'Pensioner': -0.25128338},
{'Student': 'Student|State servant|Working',
'State servant': 'Student|State servant|Working',
'Working': 'Student|State servant|Working',
'Commercial associate': 'Commercial associate',
'Pensioner': 'Pensioner',
'missing': 'missing'}),
'WOE_FLAG_EMAIL_asC': {'(-inf, 0.0]': -0.0032195034,
'(0.0, 2.0]': 0.03365548},
'_idx_range_NAME_EDUCATION_TYPE_asD': ['Academic degree|Secondary / secondary special',
'Higher education',
'Incomplete higher|Lower secondary'],
'_draft_group_NAME_EDUCATION_TYPE_asD': [],
'_idx_range_Age_asC': ['(19, 42]', '(42, 69]'],
'group_Age_asC': [19, 42, 69],
'_draft_group_Age_asC': [-inf, 28, 32, 36, 40, 44, 49, 54, 59, 66, inf],
'_cap_floor_Age_asC': None,
'WOE_NAME_EDUCATION_TYPE_asD': ({'Academic degree': 0.057481077,
'Secondary / secondary special': 0.057481077,
'Higher education': -0.06270337,
'Incomplete higher': -0.34401053,
'Lower secondary': -0.34401053},
{'Academic degree': 'Academic degree|Secondary / secondary special',
'Secondary / secondary special': 'Academic degree|Secondary / secondary special',
'Higher education': 'Higher education',
'Incomplete higher': 'Incomplete higher|Lower secondary',
'Lower secondary': 'Incomplete higher|Lower secondary',
'missing': 'missing'}),
'WOE_Age_asC': {'(19, 42]': 0.010436707, '(42, 69]': -0.010596097},
'_idx_range_AMT_INCOME_TOTAL_asC': ['(26999.0, 135000.0]',
'(135000.0, 225000.0]',
'(225000.0, 1575001.0]'],
'group_AMT_INCOME_TOTAL_asC': [26999.0, 135000.0, 225000.0, 1575001.0],
'_draft_group_AMT_INCOME_TOTAL_asC': [-inf,
90000.0,
112500.0,
135000.0,
157500.0,
180000.0,
225000.0,
292500.0,
675000.0,
inf],
'_cap_floor_AMT_INCOME_TOTAL_asC': None,
'WOE_AMT_INCOME_TOTAL_asC': {'(26999.0, 135000.0]': -0.039321624,
'(135000.0, 225000.0]': 0.011731351,
'(225000.0, 1575001.0]': 0.054842446},
'var_dict': {'AMT_INCOME_TOTAL': 'AMT_INCOME_TOTAL_asC',
'Age': 'Age_asC',
'NAME_EDUCATION_TYPE': 'NAME_EDUCATION_TYPE_asD',
'FLAG_EMAIL': 'FLAG_EMAIL_asC',
'NAME_INCOME_TYPE': 'NAME_INCOME_TYPE_asD',
'DAYS_BIRTH': 'DAYS_BIRTH_asC',
'CNT_FAM_MEMBERS': 'CNT_FAM_MEMBERS_asC',
'CODE_GENDER': 'CODE_GENDER_asD',
'FLAG_OWN_CAR': 'FLAG_OWN_CAR_asD',
'DAYS_EMPLOYED': 'DAYS_EMPLOYED_asC',
'OCCUPATION_TYPE': 'OCCUPATION_TYPE_asD',
'NAME_FAMILY_STATUS': 'NAME_FAMILY_STATUS_asD',
'FLAG_OWN_REALTY': 'FLAG_OWN_REALTY_asD',
'NAME_HOUSING_TYPE': 'NAME_HOUSING_TYPE_asD',
'CNT_CHILDREN': 'CNT_CHILDREN_asC',
'FLAG_PHONE': 'FLAG_PHONE_asC',
'FLAG_WORK_PHONE': 'FLAG_WORK_PHONE_asC'},
'__version__': 'Sat Jan 22 21:51:31 2022'}at.dt.Api.read_json_file("./数据分析demo/v12/Data_v12.json")
{'add_cap_info': False,
'add_cum_info': False,
'add_cum_left': False,
'add_cum_odds': False,
'add_excel_table_dir': True,
'add_gini_lift': False,
'add_ind_ks': False,
'add_info': 'ID',
'add_unique_info': True,
'all_inf_replace': 1,
'auto_convert_dtype': True,
'auto_discrete_max': 50,
'auto_discrete_min': 3,
'auto_formatting': True,
'auto_merge_discrete': True,
'auto_nan_discrete': None,
'bad': 1,
'best_path_generate': 'tree',
'best_search_depth': 3,
'capping_flooring_limit': [0.01, 0.99],
'check_ind_monotonicity': True,
'check_monotonicity': True,
'check_separate_nearby': False,
'check_separate_value': False,
'check_test_monotonic': False,
'chunk_size': 1,
'continuous_discrete_limit': 1,
'corr_table_method': 'pearson',
'cross_response': None,
'cross_split_detail': False,
'cross_split_summary': True,
'customized_groups': {'AMT_INCOME_TOTAL': [26999.0,
135000.0,
225000.0,
1575001.0],
'Age': [19, 42, 69],
'CNT_CHILDREN': [-inf, 0, inf],
'CNT_FAM_MEMBERS': [0.0, 2.0, 21.0],
'CODE_GENDER': ['F', 'M', 'missing'],
'DAYS_BIRTH': [-25153, -15602, -7488],
'DAYS_EMPLOYED': [-15662, -2501, -746, 365244],
'FLAG_EMAIL': [-inf, 0, 2],
'FLAG_OWN_CAR': ['N', 'Y', 'missing'],
'FLAG_OWN_REALTY': ['N', 'Y', 'missing'],
'FLAG_PHONE': [-inf, 0, 2],
'FLAG_WORK_PHONE': [-inf, 0, 2],
'NAME_EDUCATION_TYPE': ['Academic degree|Secondary / secondary special',
'Higher education',
'Incomplete higher|Lower secondary',
'missing'],
'NAME_FAMILY_STATUS': ['Civil marriage|Married',
'Separated|Single / not married',
'Widow',
'missing'],
'NAME_HOUSING_TYPE': ['Municipal apartment|Co-op apartment|Office apartment',
'Rented apartment|With parents|House / apartment',
'missing'],
'NAME_INCOME_TYPE': ['Commercial associate',
'Pensioner',
'Student|State servant|Working',
'missing'],
'OCCUPATION_TYPE': ['Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries',
'Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff',
'Managers|High skill tech staff|IT staff',
'missing']},
'cut_limit_list': [20, 5, 3],
'cut_method': 'cumsum',
'data_bar_cols': ['rate', 'woe', 'corr'],
'deal_unused_input': 'error',
'discrete_gap_percent': True,
'discrete_group_num': True,
'discrete_group_percent': True,
'discrete_sort_method': 'woe',
'draft_groups': {'AMT_INCOME_TOTAL_asC': [-inf,
90000.0,
112500.0,
135000.0,
157500.0,
180000.0,
225000.0,
292500.0,
675000.0,
inf],
'Age_asC': [-inf, 28, 32, 36, 40, 44, 49, 54, 59, 66, inf],
'CNT_FAM_MEMBERS_asC': [-inf, 1.0, 2.0, 3.0, inf],
'CODE_GENDER_asD': [],
'DAYS_BIRTH_asC': [-inf,
-22014,
-20337,
-18650,
-17051,
-15595,
-14400,
-13144,
-11841,
-10482,
inf],
'DAYS_EMPLOYED_asC': [-inf,
-5329,
-3592,
-2744,
-2076,
-1539,
-1051,
-596,
-183,
inf],
'FLAG_EMAIL_asC': [-inf, 0, inf],
'FLAG_OWN_CAR_asD': [],
'FLAG_OWN_REALTY_asD': [],
'FLAG_PHONE_asC': [-inf, 0, inf],
'FLAG_WORK_PHONE_asC': [-inf, 0, inf],
'NAME_EDUCATION_TYPE_asD': [],
'NAME_FAMILY_STATUS_asD': [],
'NAME_HOUSING_TYPE_asD': [],
'NAME_INCOME_TYPE_asD': []},
'enable_capping_flooring': False,
'enable_iv_limit': False,
'enable_iv_rank': True,
'enable_kmeans_mini': True,
'enable_lock': True,
'enable_mini_tol_value': False,
'enable_multi': True,
'enable_optimal_woe': True,
'enable_single_threshold': False,
'enable_supervised_cumsum': True,
'eval_cross_stable': False,
'eval_test_stable': False,
'eval_train_stable': False,
'exclude_column': ['ID', 'weight'],
'fill_inf_nan': True,
'fill_woe_type': 'avg_all',
'find_step_mode': False,
'find_step_num': 3,
'flow_data_type': 'WOE_',
'format_header': True,
'format_time_edge': False,
'get_cut_by': ['len', 'ks', 'iv'],
'good': 0,
'group_include_none': True,
'hidden_cols': ['var_name', 'var_new'],
'hidden_tables': None,
'include_right': True,
'ind': 2,
'inf_gap_percent': 0.3,
'is_time_series': True,
'iv_gap_ratio_test': 0.5,
'iv_gap_ratio_train': 0.5,
'iv_gap_train_test': 0.2,
'iv_threshold': [0.02, inf],
'keep_inf_iv': False,
'keep_iv_num': 2000,
'keep_separate_value': None,
'kmeans_auto_omp_num_threads': True,
'kmeans_init_method': 'k-means++',
'kmeans_init_number': 6,
'kmeans_max_iterint': 200,
'kmeans_mini_init_number': None,
'kmeans_mini_number': 100,
'kmeans_mno_improvement': 10,
'kmeans_random_state': 0,
'kmeans_reassignment_ratio': 0.01,
'kmeans_tolerance_value': 0.0001,
'ks_gap_ratio_test': 0.3,
'ks_gap_ratio_train': 0.3,
'ks_gap_train_test': 0.1,
'max_cut_part': 10,
'max_discrete_num': 100,
'max_percent_gap_cross': 0.1,
'max_percent_gap_test': 0.1,
'max_percent_gap_total': 0.1,
'max_percent_gap_train': 0.1,
'max_split_part': 10,
'mean_cross_psi_test': 0.1,
'mean_cross_psi_train': 0.1,
'mean_quantile_variation_test': inf,
'mean_quantile_variation_train': inf,
'merge_discrete_plan': 'best-rank',
'min_discrete_num': 1,
'min_gap_percent': 0.0,
'min_group_num': 2,
'min_group_percent': 0,
'min_nan_percent': 0.05,
'monotonic_cnt_rate_test': 0.5,
'monotonic_cnt_rate_train': 0.5,
'multi_num': 8,
'nan_group': 'missing',
'output_name': './数据分析demo/v12/v12.xlsx',
'overwrite_json': False,
'precision': 3,
'random_cut': False,
'random_method_range': None,
'random_search': False,
'recover_from_json': None,
'recover_from_pkl': False,
'replace_option': None,
'response': 'target',
'sample_weight_name': None,
'save_corr_detail': False,
'save_corr_summary': True,
'save_data_pkl': True,
'save_desc_info': True,
'save_draft_table': True,
'save_json': True,
'save_null_count': False,
'save_one_hot': False,
'save_or_return': True,
'save_ori': False,
'save_raw_dataframe': False,
'save_woe_data': True,
'search_cut': True,
'select_test_base': 'min',
'select_train_base': 'min',
'single_include_none': True,
'single_threshold': 0.85,
'slient_performance_warning': True,
'sort_discrete_rank': True,
'split_col_name': 'birth_year',
'split_data_method': 3,
'split_desc_info': False,
'split_random_state': None,
'start_time': 'Sat Jan 22 21:51:31 2022',
'stratified_split': False,
'strict_monotonicity': True,
'supervised_kmeans_cumsum': True,
'supervised_tree_cumsum': False,
'task_type': 'binary',
'text_wrap_trigger': 36,
'train_test_cross_psi': 0.1,
'train_test_dir_cn': None,
'train_test_iv_gap_ratio': 0.5,
'train_test_ks_gap_ratio': 0.3,
'train_test_total_psi': 0.1,
'tree_criterion': 'gini',
'tree_splitter': 'best',
'use_response_split': True,
'use_train_time': True,
'user_define_groups': {'CNT_CHILDREN': [-inf, 0, inf],
'OCCUPATION_TYPE': ['Cleaning staff|Cooking staff|Drivers|Laborers|Low-skill Laborers|Security staff|Waiters/barmen staff',
'Accountants|Core staff|HR staff|Medicine staff|Private service staff|Realty agents|Sales staff|Secretaries',
'Managers|High skill tech staff|IT staff']},
'var_dict_path': None,
'var_explain_name': 'var_explain',
'var_range': ['-inf', 'inf'],
'var_series_name': 'var_name',
'zero_woe_nan': True}at.Analysis.data_flow(
df_train, "./数据分析demo/v12-rec/v12-rec.xlsx", test_data=df_test,
recover_from_json="./数据分析demo/v12/Data_v12.json"
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: FLAG_EMAIL_asC INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: CNT_CHILDREN_asC INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_PHONE_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 6.83 s INFO Total: 6.87 s
at.Analysis.data_flow(
df_train, "./数据分析demo/v2-more/v2-more.xlsx",
# 同时入参两个数据集:测试、全量
test_data=[df_test, df_data], train_name="开发", test_names=["测试", "全量"],
response="target", split_col_name="birth_year", use_train_time=True,
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: ID INFO Run: FLAG_WORK_PHONE INFO Run: weight INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: ID_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: ID_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: ID_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: Age_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving drop... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.66 s INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Match train: ID_asC INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: ID_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: Age_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: CNT_CHILDREN_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: ID_asC INFO Match train test: FLAG_PHONE_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.75 s INFO Total: 16.97 s
# 使用ori_name配置映射到output_name1
# 再使用output_name1配置映射到output_name2(这一步train_name&test_names需要1&2一致)
ori_name = "./数据分析demo/v12/v12.xlsx"
output_name1 = "./数据分析demo/v12-rec-more/v12-one.xlsx"
output_name2 = "./数据分析demo/v12-rec-more/v12-other.xlsx"
# 按配置历史数据100%还原
at.Analysis.data_flow(
df_train, output_name1,
test_data=[df_test, df_data], train_name="开发", test_names=["测试", "全量"],
recover_path=ori_name
)
# 继承配置并按"tree"重新训练
at.Analysis.data_flow(
df_train, output_name2,
test_data=[df_test, df_data], train_name="开发", test_names=["测试", "全量"],
recover_path=output_name1, cut_method="tree"
)
INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: FLAG_EMAIL_asC INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: CNT_CHILDREN_asC INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_PHONE_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.07 s INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: FLAG_EMAIL_asC INFO Match train: Age_asC INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_PHONE_asC INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: Age_asC INFO Match train test: FLAG_EMAIL_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CODE_GENDER_asD INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: CNT_CHILDREN_asC INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_PHONE_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.97 s INFO Total: 20.91 s WARNING cut_method found, enable overwriting ! WARNING Overwriting found, setting recover_from_pkl to False ! INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_PHONE_asC INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: OCCUPATION_TYPE_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: DAYS_BIRTH_asC INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: FLAG_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: Age_asC INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: FLAG_EMAIL_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 7.89 s WARNING cut_method found, enable overwriting ! WARNING Overwriting found, setting recover_from_pkl to False ! INFO Prepare kwargs... INFO Checking train_data... INFO Checking response... INFO Checking test_data... INFO Checking response... INFO Checking columns... INFO Checking dtypes... WARNING Setting format_time_edge to True ! INFO Prepare run... INFO Run: FLAG_MOBIL INFO Run: FLAG_PHONE INFO Run: FLAG_WORK_PHONE INFO Run: NAME_HOUSING_TYPE INFO Run: CNT_CHILDREN INFO Run: NAME_FAMILY_STATUS INFO Run: FLAG_OWN_REALTY INFO Run: DAYS_EMPLOYED INFO Run: OCCUPATION_TYPE INFO Run: FLAG_OWN_CAR INFO Run: CODE_GENDER INFO Run: DAYS_BIRTH INFO Run: CNT_FAM_MEMBERS INFO Run: NAME_INCOME_TYPE INFO Run: FLAG_EMAIL INFO Run: Age INFO Run: NAME_EDUCATION_TYPE INFO Run: AMT_INCOME_TOTAL INFO Selecting... INFO Match train: FLAG_WORK_PHONE_asC INFO Match train: FLAG_PHONE_asC INFO Match train: CNT_CHILDREN_asC INFO Match train: NAME_HOUSING_TYPE_asD INFO Match train: NAME_FAMILY_STATUS_asD INFO Match train: FLAG_OWN_REALTY_asD INFO Match train: DAYS_EMPLOYED_asC INFO Match train: OCCUPATION_TYPE_asD INFO Match train: FLAG_OWN_CAR_asD INFO Match train: CODE_GENDER_asD INFO Match train: DAYS_BIRTH_asC INFO Match train: CNT_FAM_MEMBERS_asC INFO Match train: FLAG_EMAIL_asC INFO Match train: NAME_INCOME_TYPE_asD INFO Match train: NAME_EDUCATION_TYPE_asD INFO Match train: Age_asC INFO Match train: AMT_INCOME_TOTAL_asC INFO Creating train data... INFO Saving train data... INFO Creating train corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating train cross table... INFO Creating train all desc tables... INFO Match test: FLAG_WORK_PHONE_asC INFO Match test: FLAG_PHONE_asC INFO Match test: NAME_HOUSING_TYPE_asD INFO Match test: CNT_CHILDREN_asC INFO Match test: NAME_FAMILY_STATUS_asD INFO Match test: FLAG_OWN_REALTY_asD INFO Match test: DAYS_EMPLOYED_asC INFO Match test: OCCUPATION_TYPE_asD INFO Match test: FLAG_OWN_CAR_asD INFO Match test: CODE_GENDER_asD INFO Match test: DAYS_BIRTH_asC INFO Match test: CNT_FAM_MEMBERS_asC INFO Match test: NAME_INCOME_TYPE_asD INFO Match test: FLAG_EMAIL_asC INFO Match test: Age_asC INFO Match test: NAME_EDUCATION_TYPE_asD INFO Match test: AMT_INCOME_TOTAL_asC INFO Creating test data... INFO Saving test data... INFO Creating test corr detail... INFO Creating (-inf, 1972-01-01] corr... INFO Creating (1972-01-01, 1984-01-01] corr... INFO Creating (1984-01-01, inf) corr... INFO Creating all corr... INFO Creating test cross table... INFO Creating test all desc tables... INFO Match train test: DAYS_EMPLOYED_asC INFO Match train test: FLAG_OWN_REALTY_asD INFO Match train test: NAME_HOUSING_TYPE_asD INFO Match train test: NAME_FAMILY_STATUS_asD INFO Match train test: AMT_INCOME_TOTAL_asC INFO Match train test: NAME_INCOME_TYPE_asD INFO Match train test: DAYS_BIRTH_asC INFO Match train test: CNT_FAM_MEMBERS_asC INFO Match train test: Age_asC INFO Match train test: NAME_EDUCATION_TYPE_asD INFO Match train test: CODE_GENDER_asD INFO Match train test: OCCUPATION_TYPE_asD INFO Match train test: FLAG_WORK_PHONE_asC INFO Match train test: FLAG_OWN_CAR_asD INFO Match train test: FLAG_PHONE_asC INFO Match train test: CNT_CHILDREN_asC INFO Match train test: FLAG_EMAIL_asC INFO Saving info... INFO Saving desc... INFO Saving cross... INFO Saving unique... INFO Saving summary... INFO Saving corr summary... INFO Saving stable_summary... INFO Saving train_stable_iv_ks... INFO Saving train_stable_psi... INFO Saving train_stable_quantile... INFO Saving test_stable_iv_ks... INFO Saving test_stable_psi... INFO Saving test_stable_quantile... INFO Saving stable_cross_psi... INFO Saving detail... INFO Saving draft... INFO Saving json... INFO Saving setting... INFO Adding runtime to excel... INFO Saving directory... INFO Saving... INFO Total: 8.83 s INFO Total: 22.54 s
at.dt.Api.zip_dir_files("./数据分析demo/v12-rec-more.zip", "./数据分析demo/v12-rec-more")
INFO Removing ./数据分析demo/v12-rec-more.zip... INFO Saving ./数据分析demo/v12-rec-more/Data_v12-one-开发-全量_data.pkl... INFO Saving ./数据分析demo/v12-rec-more/TRAIN_DATA_WOE_v12-one-开发-测试.csv... INFO Saving ./数据分析demo/v12-rec-more/TRAIN_DATA_WOE_v12-other-开发-全量.csv... INFO Saving ./数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-one-开发-测试.csv... INFO Saving ./数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-one-开发-全量.csv... INFO Saving ./数据分析demo/v12-rec-more/Data_v12-one-开发-全量.xlsx... INFO Saving ./数据分析demo/v12-rec-more/Data_v12-other-开发-测试.json... INFO Saving ./数据分析demo/v12-rec-more/Data_v12-other-开发-全量_data.pkl... INFO Saving ./数据分析demo/v12-rec-more/Data_v12-other-开发-全量.json... INFO Saving ./数据分析demo/v12-rec-more/Data_v12-one-开发-全量.json... INFO Saving ./数据分析demo/v12-rec-more/Data_v12-other-开发-测试_data.pkl... INFO Saving ./数据分析demo/v12-rec-more/TRAIN_DATA_WOE_v12-one-开发-全量.csv... INFO Saving ./数据分析demo/v12-rec-more/TRAIN_DATA_WOE_v12-other-开发-测试.csv... INFO Saving ./数据分析demo/v12-rec-more/Data_v12-other-开发-全量.xlsx... INFO Saving ./数据分析demo/v12-rec-more/Data_v12-other-开发-测试.xlsx... INFO Saving ./数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-other-开发-测试.csv... INFO Saving ./数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-other-开发-全量.csv... INFO Saving ./数据分析demo/v12-rec-more/Data_v12-one-开发-测试_data.pkl... INFO Saving ./数据分析demo/v12-rec-more/Data_v12-one-开发-测试.json... INFO Saving ./数据分析demo/v12-rec-more/Data_v12-one-开发-测试.xlsx...
at.Report.create_data_report2(
"./数据分析demo/v12-纵向合并/v12-纵向合并.xlsx", "./数据分析demo/v12-rec-more/v12-one.xlsx",
train_name="开发", test_names=["测试", "全量"]
)
INFO Removing ./数据分析demo/v12-纵向合并/v12-纵向合并.xlsx... INFO Removing ./数据分析demo/v12-纵向合并/v12-纵向合并-DATA-开发-DATA_WOE.csv... INFO Removing ./数据分析demo/v12-纵向合并/v12-纵向合并-REPORT-开发-全量-Data.xlsx... INFO Removing ./数据分析demo/v12-纵向合并/v12-纵向合并-DATA-全量-DATA_WOE.csv... INFO Removing ./数据分析demo/v12-纵向合并/v12-纵向合并-REPORT-开发-测试-Data.xlsx... INFO Removing ./数据分析demo/v12-纵向合并/v12-纵向合并-DATA-测试-DATA_WOE.csv... INFO Loading ./数据分析demo/v12-rec-more/Data_v12-one-开发-测试.xlsx... INFO Loading ./数据分析demo/v12-rec-more/Data_v12-one-开发-全量.xlsx... INFO Creating books summary... INFO Saving... INFO Creating 变量评估-分析汇总-开发-测试... INFO Creating 变量评估-分析汇总-开发-全量... INFO Creating 变量评估-分组详情-开发-测试... INFO Creating 变量评估-分组详情-开发-全量... INFO Creating 变量评估-细分详情-开发-测试... INFO Creating 变量评估-细分详情-开发-全量... INFO Creating 变量评估-稳定性汇总-开发... INFO Creating 变量评估-稳定性汇总-测试... INFO Creating 变量评估-稳定性汇总-全量... INFO Saving... INFO Copy files... INFO Copying /home/conda_env/数据分析demo/v12-rec-more/Data_v12-one-开发-测试.xlsx... INFO Copying /home/conda_env/数据分析demo/v12-rec-more/Data_v12-one-开发-全量.xlsx... INFO Creating csv path... INFO Copying ./数据分析demo/v12-rec-more/TRAIN_DATA_WOE_v12-one-开发-测试.csv... INFO Copying ./数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-one-开发-测试.csv... INFO Copying ./数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-one-开发-全量.csv...
(['./数据分析demo/v12-rec-more/v12-one-开发-测试.xlsx',
'./数据分析demo/v12-rec-more/v12-one-开发-全量.xlsx'],
['开发-测试', '开发-全量'],
['', ''],
{'./数据分析demo/v12-纵向合并/v12-纵向合并-DATA-开发-DATA_WOE.csv': './数据分析demo/v12-rec-more/TRAIN_DATA_WOE_v12-one-开发-测试.csv',
'./数据分析demo/v12-纵向合并/v12-纵向合并-DATA-测试-DATA_WOE.csv': './数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-one-开发-测试.csv',
'./数据分析demo/v12-纵向合并/v12-纵向合并-DATA-全量-DATA_WOE.csv': './数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-one-开发-全量.csv'})at.Report.create_data_report2(
"./数据分析demo/v12-纵横合并/v12-纵横合并.xlsx",
{
"plan1": "./数据分析demo/v12-rec-more/v12-one.xlsx",
"plan2": "./数据分析demo/v12-rec-more/v12-other.xlsx"
},
train_name="开发", test_names=["测试", "全量"]
)
INFO Removing ./数据分析demo/v12-纵横合并/v12-纵横合并.xlsx... INFO Removing ./数据分析demo/v12-纵横合并/v12-纵横合并-REPORT-plan2-开发-全量-Data.xlsx... INFO Removing ./数据分析demo/v12-纵横合并/v12-纵横合并-REPORT-plan1-开发-测试-Data.xlsx... INFO Removing ./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan2-全量-DATA_WOE.csv... INFO Removing ./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan1-开发-DATA_WOE.csv... INFO Removing ./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan1-测试-DATA_WOE.csv... INFO Removing ./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan2-测试-DATA_WOE.csv... INFO Removing ./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan2-开发-DATA_WOE.csv... INFO Removing ./数据分析demo/v12-纵横合并/v12-纵横合并-REPORT-plan2-开发-测试-Data.xlsx... INFO Removing ./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan1-全量-DATA_WOE.csv... INFO Removing ./数据分析demo/v12-纵横合并/v12-纵横合并-REPORT-plan1-开发-全量-Data.xlsx... INFO Loading ./数据分析demo/v12-rec-more/Data_v12-one-开发-测试.xlsx... INFO Loading ./数据分析demo/v12-rec-more/Data_v12-one-开发-全量.xlsx... INFO Loading ./数据分析demo/v12-rec-more/Data_v12-other-开发-测试.xlsx... INFO Loading ./数据分析demo/v12-rec-more/Data_v12-other-开发-全量.xlsx... INFO Creating books summary... INFO Saving... INFO Creating 变量评估-分析汇总-plan1-开发-测试... INFO Creating 变量评估-分析汇总-plan1-开发-全量... INFO Creating 变量评估-分析汇总-plan2-开发-测试... INFO Creating 变量评估-分析汇总-plan2-开发-全量... INFO Creating 变量评估-分组详情-plan1-开发-测试... INFO Creating 变量评估-分组详情-plan1-开发-全量... INFO Creating 变量评估-分组详情-plan2-开发-测试... INFO Creating 变量评估-分组详情-plan2-开发-全量... INFO Creating 变量评估-细分详情-plan1-开发-测试... INFO Creating 变量评估-细分详情-plan1-开发-全量... INFO Creating 变量评估-细分详情-plan2-开发-测试... INFO Creating 变量评估-细分详情-plan2-开发-全量... INFO Creating 变量评估-稳定性汇总-plan1-开发... INFO Creating 变量评估-稳定性汇总-plan2-开发... INFO Creating 变量评估-稳定性汇总-plan1-测试... INFO Creating 变量评估-稳定性汇总-plan1-全量... INFO Creating 变量评估-稳定性汇总-plan2-测试... INFO Creating 变量评估-稳定性汇总-plan2-全量... INFO Saving... INFO Copy files... INFO Copying /home/conda_env/数据分析demo/v12-rec-more/Data_v12-one-开发-测试.xlsx... INFO Copying /home/conda_env/数据分析demo/v12-rec-more/Data_v12-one-开发-全量.xlsx... INFO Copying /home/conda_env/数据分析demo/v12-rec-more/Data_v12-other-开发-测试.xlsx... INFO Copying /home/conda_env/数据分析demo/v12-rec-more/Data_v12-other-开发-全量.xlsx... INFO Creating csv path... INFO Copying ./数据分析demo/v12-rec-more/TRAIN_DATA_WOE_v12-one-开发-测试.csv... INFO Copying ./数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-one-开发-测试.csv... INFO Copying ./数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-one-开发-全量.csv... INFO Copying ./数据分析demo/v12-rec-more/TRAIN_DATA_WOE_v12-other-开发-测试.csv... INFO Copying ./数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-other-开发-测试.csv... INFO Copying ./数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-other-开发-全量.csv...
(['./数据分析demo/v12-rec-more/v12-one-开发-测试.xlsx',
'./数据分析demo/v12-rec-more/v12-one-开发-全量.xlsx',
'./数据分析demo/v12-rec-more/v12-other-开发-测试.xlsx',
'./数据分析demo/v12-rec-more/v12-other-开发-全量.xlsx'],
['plan1-开发-测试', 'plan1-开发-全量', 'plan2-开发-测试', 'plan2-开发-全量'],
['', ''],
{'./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan1-开发-DATA_WOE.csv': './数据分析demo/v12-rec-more/TRAIN_DATA_WOE_v12-one-开发-测试.csv',
'./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan1-测试-DATA_WOE.csv': './数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-one-开发-测试.csv',
'./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan1-全量-DATA_WOE.csv': './数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-one-开发-全量.csv',
'./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan2-开发-DATA_WOE.csv': './数据分析demo/v12-rec-more/TRAIN_DATA_WOE_v12-other-开发-测试.csv',
'./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan2-测试-DATA_WOE.csv': './数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-other-开发-测试.csv',
'./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan2-全量-DATA_WOE.csv': './数据分析demo/v12-rec-more/TEST_DATA_WOE_v12-other-开发-全量.csv'})at.dt.Api.zip_dir_files("./数据分析demo/v12-纵横合并.zip", "./数据分析demo/v12-纵横合并/")
INFO Removing ./数据分析demo/v12-纵横合并.zip... INFO Saving ./数据分析demo/v12-纵横合并/v12-纵横合并-REPORT-plan2-开发-全量-Data.xlsx... INFO Saving ./数据分析demo/v12-纵横合并/v12-纵横合并-REPORT-plan1-开发-测试-Data.xlsx... INFO Saving ./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan2-全量-DATA_WOE.csv... INFO Saving ./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan1-开发-DATA_WOE.csv... INFO Saving ./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan1-测试-DATA_WOE.csv... INFO Saving ./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan2-测试-DATA_WOE.csv... INFO Saving ./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan2-开发-DATA_WOE.csv... INFO Saving ./数据分析demo/v12-纵横合并/v12-纵横合并.xlsx... INFO Saving ./数据分析demo/v12-纵横合并/v12-纵横合并-REPORT-plan2-开发-测试-Data.xlsx... INFO Saving ./数据分析demo/v12-纵横合并/v12-纵横合并-DATA-plan1-全量-DATA_WOE.csv... INFO Saving ./数据分析demo/v12-纵横合并/v12-纵横合并-REPORT-plan1-开发-全量-Data.xlsx...
at.dt.Api.zip_dir_files("./数据分析demo/v12-纵向合并.zip", "./数据分析demo/v12-纵向合并/")
INFO Removing ./数据分析demo/v12-纵向合并.zip... INFO Saving ./数据分析demo/v12-纵向合并/v12-纵向合并-DATA-开发-DATA_WOE.csv... INFO Saving ./数据分析demo/v12-纵向合并/v12-纵向合并-REPORT-开发-全量-Data.xlsx... INFO Saving ./数据分析demo/v12-纵向合并/v12-纵向合并.xlsx... INFO Saving ./数据分析demo/v12-纵向合并/v12-纵向合并-DATA-全量-DATA_WOE.csv... INFO Saving ./数据分析demo/v12-纵向合并/v12-纵向合并-REPORT-开发-测试-Data.xlsx... INFO Saving ./数据分析demo/v12-纵向合并/v12-纵向合并-DATA-测试-DATA_WOE.csv...
at.dt.Api.zip_dir_files("./数据分析demo/v9-算法对比.zip", "./数据分析demo/v9-算法对比")
INFO Removing ./数据分析demo/v9-算法对比.zip... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-cumsum-Data.xlsx... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-chimerge-验证-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-linspace-开发-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-quantile-开发-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-linspace-Data.xlsx... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-quantile-验证-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-tree-验证-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-tree-开发-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比.xlsx... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-kmeans-验证-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-chimerge-开发-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-ratemerge-开发-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-bestks-开发-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-cumsum-开发-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-ratemerge-验证-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-linspace-验证-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-chimerge-Data.xlsx... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-bestks-Data.xlsx... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-quantile-Data.xlsx... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-kmeans-开发-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-kmeans-Data.xlsx... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-cumsum-验证-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-ratemerge-Data.xlsx... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-DATA-bestks-验证-DATA_WOE.csv... INFO Saving ./数据分析demo/v9-算法对比/v9-算法对比-REPORT-tree-Data.xlsx...