决策表快速排序

一、说明。

所谓决策表,,类似于关系数据库的二位数据表,形如:43 01 0 181 01 2 01 2 173 174 0

排序后输出:

1 0 11 2 01 2 14 3 07 3 17 4 08 1 0

二、问题由来。决策表约简是粗糙集的一个经典问题。关于如何解释粗糙集约简问题,我有一个很简单的解释,不过不会在这里写出。简而言之约简就是在保持原有数据集分类能力的前提下删除冗余属性。粗糙集的创始者Pawlak有着一个近乎偏执的理念:知识就是分类。

完成分类是进一步完成粗糙集约简的基础。所以针对如何分类就有了各种各样的解法。

蛮力算法就是两两比较,完成分类,这个复杂度很高。在这种情况下,先排序再分类是一个进步的方法。当然排序的方法也很多,基数排序、快速排序都是排序,也的确都有人进行过尝试。

我这里的这个排序方法来自于《计算机学报》上的一篇《属性序下的快速约简算法》。文章的作者当时发了两篇文章,这篇约简的文章建立在另外一篇《二维表快速排序的复杂度分析》之上。

这里我只是简单实现了原文算法。

三、实现代码,只是想重复这个实验,然后用我的方法与此相比较。

1 #include <stdlib.h> 2 #include <string.h> 3 #include <stdio.h> 4 #include <math.h> 5 #include <time.h> 6 #include <windows.h>AttOrderTerminator = -1; 10 11 typedef struct tagConditionClass{terminalRowNO; available; }ConditionClass; tagDecisionTableEX{ 18DecisionTable * table; 19int * tblIdx;; 21int to; 22 }; 23 typedef tagDecisionTableEX DecisionTableEX;partition(DecisionTable * table, int * tblIdx, int stage, int low, int high); 26 int TDQuicksort(DecisionTable * table, int * tblIdx, int stage, int low, int high); 27 int loadDecisionTablePositiveRegion(DecisionTable * table, int * tblIdx, bool * tblPositiveRegion); 28 int partitionMatrix(DecisionTableEX * tex, int * attOrder, int stage, int * nonEmptyLabel, bool * tblPositiveRegion); 29 int attOrderReduction();partition(DecisionTable * table, int * tblIdx, int stage, int low, int high){ 32TableElement * s = table->dataCenter; 33int ext = table->extCdnAttribCount; 34int t;mid = low; 37int hiEnd = mid+1; 38int counter = 0; 39for(int i=low+1; i<=high; i++){= s[tblIdx[low] * ext + stage]; 41int element = s[tblIdx[i] * ext + stage]; 42if (element < ref){ 43mid++; 44hiEnd++; 45t = tblIdx[mid]; 46tblIdx[mid] = tblIdx[i]; 47tblIdx[i] = t; 48 } 49if(element == ref){ 50t = tblIdx[i]; 51tblIdx[i] = tblIdx[hiEnd]; 52tblIdx[hiEnd] = t; 53hiEnd++; 54counter++; 55 } 56 } 57 58t = tblIdx[low]; 59tblIdx[low] = tblIdx[mid]; 60tblIdx[mid] = t;(mid == low) return mid + counter;mid-1; 65 }TDQuicksort(DecisionTable * table, int * tblIdx, int stage, int low, int high){ 68TableElement * s = table->dataCenter; 69int ext = table->extCdnAttribCount;(stage > table->cdnAttributeCount) return 0;;NextDemension = false; 75for (int i=low+1; i<=high; i++) 76if ( s[tblIdx[i] * ext + stage] != s[tblIdx[low] * ext + stage]){ 77NextDemension = true; 78break; 79 } (NextDemension){ 82int mid = partition(table, tblIdx, stage, low, high); 83 TDQuicksort(table, tblIdx, stage, low, mid); 84TDQuicksort(table, tblIdx, stage, mid+1, high); 85 }(!NextDemension){ 88TDQuicksort(table, tblIdx, stage+1, low, high); 89 }; 92 }loadDecisionTablePositiveRegion(DecisionTable * table, int * tblIdx, bool * tblPositiveRegion){ 95int CdnEquClsNO = table->elementCount+2; 96int cdnClsPointer = 0;cdn = table->cdnAttributeCount; 99int ext = table->extCdnAttribCount;100int tfsi = table->elementCount;101102ConditionClass * cdnCls = NULL;103HANDLE heap = NULL;cc = table->cdnCmp;106 107heap = HeapCreate(HEAP_NO_SERIALIZE|HEAP_GENERATE_EXCEPTIONS, 1024*1024, 0);108if (heap != NULL){109cdnCls = (ConditionClass * )HeapAlloc(heap, 0, CdnEquClsNO * sizeof(ConditionClass));110 }111MakeSure(cdnCls != NULL);112SecureZeroMemory(cdnCls, CdnEquClsNO * sizeof(ConditionClass));= 0;115while(from < tfsi){116int duplicate = 0;117int i=from;118BigInt * src64 = (BigInt *)(table->dataCenter + tblIdx[from] * ext);cdnCls[cdnClsPointer].available = true;(true){125bool bird = true;126if (i == tfsi) break;127BigInt * dst64 = (BigInt *)(table->dataCenter + tblIdx[i] * ext);128for(int m=0; m<cc; m++)129if(src64[m]^dst64[m]){130bird = false;131break;132 }133if (!bird) break;(cdnCls[cdnClsPointer].available == true)136if (table->dcnElement[tblIdx[i]] != table->dcnElement[tblIdx[from]]){137cdnCls[cdnClsPointer].available = false;138 }139140i++;141duplicate++;142 }+= duplicate;145cdnCls[cdnClsPointer].terminalRowNO = from – 1;146cdnClsPointer++;147 }(int i=0; i<cdnClsPointer; i++){150int start = cdnCls[i].deputyRowNO;151int terminal = cdnCls[i].terminalRowNO;152if (cdnCls[i].available){153for (int m=start; m<=terminal; m++) tblPositiveRegion[tblIdx[m]] = true;154 }155if (!cdnCls[i].available){156for (int m=start; m<=terminal; m++) tblPositiveRegion[tblIdx[m]] = false;157 }158 }159 160HeapFree(heap, 0, cdnCls);161 HeapDestroy(heap);;164 }partitionMatrix(DecisionTableEX * tex, int * attOrder, int stage, int * nonEmptyLabel, bool * tblPositiveRegion){167DecisionTable * table = tex->table;168int * tblIdx = tex->tblIdx;= tex->from;170int to = tex->to;(attOrder[stage] != AttOrderTerminator){;noPRelement = true;176for (int i=from; i<=to; i++)177if (tblPositiveRegion[tblIdx[i]]){178noPRelement = false;179break;180 };cannotDistinguishInStage = true;184int ext = table->extCdnAttribCount;185TableElement * s = table->dataCenter;186for (int i=from; i<=to; i++)187if (s[tblIdx[i] * ext + attOrder[stage]] != s[tblIdx[from] * ext + attOrder[stage]]){188cannotDistinguishInStage = false;189break;190 }191if (cannotDistinguishInStage) partitionMatrix(tex, attOrder, stage+1, nonEmptyLabel, tblPositiveRegion);(cannotDistinguishInStage == false){194nonEmptyLabel[stage] = 1;195int sum = 0;196double avg = 0;197for (int i=from; i<=to; i++) sum += s[tblIdx[i] * ext + attOrder[stage]];198avg = ((double)sum) / (to – from + 1);199int mid = from -1;200for (int i=from; i<=to; i++){201if (s[tblIdx[i] * ext + attOrder[stage]] <= avg){202mid++;203int t = tblIdx[mid];204tblIdx[mid] = tblIdx[i];205tblIdx[i] = t;206 }207 }208//if (mid==from || mid==to) __debugbreak();tex->from = from;212tex->to = mid;213 partitionMatrix(tex, attOrder, stage, nonEmptyLabel, tblPositiveRegion);214215tex->from = mid+1;216tex->to = to;217 partitionMatrix(tex, attOrder, stage, nonEmptyLabel, tblPositiveRegion);218 }stage++;222 };225 } attOrderReduction(){228 DecisionTable table;229 time_t timeBegin;230 time_t timeEnd;231char fileName[MAX_STR];232 233beginDecisionTable(&table););, fileName, MAX_STR););238timeBegin = clock();239fillTableWithFile(&table, fileName);240timeEnd = clock();, (double)(timeEnd-timeBegin)/CLOCKS_PER_SEC););timeBegin = clock();246int * tblIdx = (int *)malloc(table.elementCount * sizeof(int));247bool * tblPositiveRegion = (bool *)malloc(table.elementCount * sizeof(bool));248for (int i=0; i<table.elementCount; i++) tblIdx[i]=i;249250TDQuicksort(&table, tblIdx, 0, 0, table.elementCount-1);251//TDQuicksort test code252//FILE * fp;253//fopen_s(&fp, “r8.txt”, “w+”);254//for (int i=0; i<table.elementCount; i++){255// for (int m=0; m<table.cdnAttributeCount; m++)256//fprintf(fp, “%8d”, table.dataCenter[ tblIdx[i]*table.extCdnAttribCount + m ]);257// fprintf(fp, “\n”);258//};262 } main(){265 attOrderReduction();;268 }向上攀爬的。

决策表快速排序

相关文章:

你感兴趣的文章:

标签云: