大作业 数据清洗(缺省值处理)
这次实现了缺省值处理部分内容,主要有将缺省值按中位数,平均值补全。将缺省行或者列去掉。
#缺省值补全 @app.route('/data_clean_supply') def data_clean_supply(): supply_type = request.values.get("supply_type") flag='1' try: global data_clean data_clean =dataclean.data_clean_supply(data_clean,supply_type) # 去重后的数据保存,为以后处理缺省值 except (Exception, BaseException) as e: exstr = traceback.format_exc() print( exstr) flag='0' print(supply_type+"flag:"+flag) return jsonify({"cog":flag}) pass #缺省值去除 @app.route("/data_clean_remove") def data_clean_remove(): action_on = str(request.values.get("action_on")) type_on = str(request.values.get("type_on")) min_num = str(request.values.get("min_num")) flag='1' try: global data_clean dataclean.data_clean_remove(data_clean,action_on,type_on,min_num) except (Exception, BaseException) as e: exstr = traceback.format_exc() print(exstr) flag='0' print( "min_num:"+min_num+",type_on:"+type_on+",action_on:"+action_on+",flag:"+flag) return jsonify({"cog":flag}) pass
#获取要补充的数值 def data_clean_supply_num(temp_col_not_nan,suplly_type): if (suplly_type == "median"): return temp_col_not_nan.median() if(suplly_type == "mean"): return temp_col_not_nan.mean() #补全缺省值 def data_clean_supply(data_clean,suplly_type): for i in range(data_clean.shape[1]):#遍历列数 try: temp_col=data_clean.iloc[:,i]#取一列 temp_col_not_nan=temp_col[temp_col==temp_col].astype('float')#当前一列不为nan,使用布尔索引 mean=data_clean_supply_num(temp_col_not_nan,suplly_type) flag_list=[] num=len(temp_col) for j in range(num): if(temp_col.iloc[j]!=temp_col.iloc[j]): flag_list.append(mean) else: flag_list.append(temp_col.iloc[j]) data_clean.iloc[:,i]=flag_list# 填充均值,必须整列赋值 except: pass #print("不是数字类型") return data_clean pass #去除缺省行或列 def data_clean_remove(data_clean,action_on,type_on,minnum): return data_clean.dropna(axis=action_on, how=type_on, thresh=int(minnum)) pass
有一点要注意由于从数据库中读出的缺省值是‘’而不是nan所以要转为nan
#把‘’转换为nan for j in range(num_1): flag_list=[] for i in range(num_0): if(df.iloc[i][j]==''): flag_list.append(np.nan) else: flag_list.append(df.iloc[i][j]) df.iloc[:,j]=flag_list#必须整列赋值,如果单个赋值则会失败
前台部分:
<blockquote class="layui-elem-quote layui-text"> <h2>缺省值处理h2> blockquote> <fieldset class="layui-elem-field layui-field-title" style="margin-top: 30px;"> <legend>缺省值补全legend> fieldset> <form class="layui-form" action=""> <div class="layui-form-item"> <label class="layui-form-label">补全方式label> <div class="layui-input-block"> <select name="interest" lay-filter="aihao" id="supply_type"> <option value="">option> <option value="median">中位数option> <option value="mode" selected="">众数option> <option value="average">平均数option> select> <button type="button" class="layui-btn layui-btn-normal" id="supply_submit" onclick="supply_submit">确定button> div> div> form> <fieldset class="layui-elem-field layui-field-title" style="margin-top: 30px;"> <legend>缺省值去除legend> fieldset> <form class="layui-form" action=""> <div class="layui-form-item"> <label class="layui-form-label">作用于label> <div class="layui-input-block"> <input type="radio" name="action_on" value="index" title="行" checked=""> <input type="radio" name="action_on" value="columns" title="列"> div> div> <div class="layui-form-item"> <label class="layui-form-label">非空值最小数label> <div class="layui-input-block"> <input type="text" id="min_num" lay-verify="title" autocomplete="off" placeholder="非空值小于此数目的行或列将被删除" class="layui-input"> div> div> <div class="layui-form-item"> <label class="layui-form-label">方式label> <div class="layui-input-block"> <input type="radio" name="type_on" value="all" title="all" checked=""> <input type="radio" name="type_on" value="any" title="any"> div> <button type="button" class="layui-btn layui-btn-normal" id="remove_submit" >确定button> div> form>