CTString CCurrencyField::GetAsString() { ////assert( m_pValue && *m_pValue == fvValid ); //指示字节必须为1 CTString strRetValue; CTString strFormat ; //strFormat.Format("%%%d.%df",m_nWidth,m_nDecimals); strFormat.Format("%%%d.%df",0,m_nDecimals); strRetValue.Format(strFormat,*((FieldNUM *)m_pValue)); strRetValue.TrimLeft(); /* if (m_nWidth >m_nTempBufLength)//#_修改 2002-12-12 $ 14:04:21 zlq SetTempBufLength(m_nWidth);//如果空间不够,则扩大临时缓冲区 gcvt( ((FieldCurrency *)m_pValue)->dwValue, m_nWidth, m_pTempBuf ); strRetValue = _T(m_pTempBuf); */ return strRetValue; }
/************************************************************************************ 函数名称: CField* CFields::NewFieldType(CFieldType chType) 功能说明:根据传入的字段类型生成相应的字段类型. 详细解释:1.返回新生成的字段. 2.如果传入的chType无效,则返回NULL. 出入参数: [in]: 1.chType:传入的字段类型. [out]:无. 返回类型:CField* 制作:YTLI 2002/07/15 修改: ***********************************************************************************/ CField* CFields::NewFieldType(CFields* pOwner ,CFieldType chType) { CField* pField = NULL;//如果传入的chType无效,则返回NULL. switch(chType) { case fString : pField = new CStringField(this); break; case fDouble : pField = new CDoubleField(this); break; case fInt : pField = new CIntField(this); break; case fBoolean : pField = new CBooleanField(this); break; case fDate : pField = new CDateField(this); break; case fCurrency : pField = new CCurrencyField(this); break; } CTString str;//###_Mod 2002-9-24 zlq 缺省字段名 int nCount = m_FieldVersion.GetAbsoluteFieldCount(); do {//#_S 2003-5-28 $ 9 zlq 确保生成时,就唯一 str.Format("%d",nCount+1); while (str.GetLength()<4) {//#_修改 2002-11-15 $ 9:15:44 zlq str = "0"+str; } str = "变量" +str; nCount++; }while (FieldByName(str)!=NULL); pField->SetFieldName(str); pField->m_pFields = pOwner; return pField ; }
int CRsltElementText::GetStringRow(CTString string) { int index=0, row=0; CTString str; str.Format("%s\n",""); while(index>=0) { index=string.Find(str,index); if(index>=0) { row++; index++; } else break; } return row; }
void CMarkovChain::DrawResult() { if (m_nTotalID > 1000 || m_bSaveData) { //在主界面输出聚类结果 int i; CDataAccess DataAcc; CField *pUserIdNum = DataAcc.m_pFieldList->CreateField(fInt); CField *pCluster = DataAcc.m_pFieldList->CreateField(fInt); pUserIdNum->SetFieldName("用户序号"); pCluster->SetFieldName("分类结果"); DataAcc.m_pFieldList->Add(pUserIdNum); DataAcc.m_pFieldList->Add(pCluster); for ( i=0; i<m_nTotalID; i++) { DataAcc.Append(); pUserIdNum->SetAsInteger(i+1); pCluster->SetAsInteger(m_VecClus(i) ); } BOOL bSucSave;//保存文件 CTString NoUse, FileName; if (m_bSaveData) { bSucSave = DataAcc.SaveFile(m_DataFile, &NoUse); FileName = m_DataFile; } else { CTString strOutNameTemp = "_马尔可夫链聚类结果"; CTString strOrgNameTemp = m_pDataInterface->m_DataAccess.GetFileName(); FileName = CFileReadWrite::GetSaveFileName(strOrgNameTemp, strOutNameTemp); bSucSave = DataAcc.SaveFile(FileName, &NoUse); } CRsltElementText * pWarningTextRslt = new CRsltElementText( "马尔可夫链分类结果" ); CTString strWarn; if (bSucSave) strWarn = "模型运行结束,结果已经成功输出到"+FileName+"文件里。"; else strWarn = "模型运行结束,结果已经成功输出到"+NoUse+"文件里。"; pWarningTextRslt->AddString(strWarn); m_pResult->Add( pWarningTextRslt ); return; } else { CTLTable * pTable = new CTLTable; CRsltElementTable * pETable = new CRsltElementTable("马尔可夫链分类结果",pTable);//申请一个表对象类的对象 pTable->CreateTable(2, 2); pTable->SetTitle("马尔可夫链分类情况"); pTable->SetCols(2); pTable->SetRows(m_nTotalID+1); pTable->InsertColumn(0,"用户序号"); pTable->InsertColumn(1,"分类结果"); CTString str; for(int i=0;i<m_nTotalID;i++) { str.Format("%d",i+1); pTable->InsertItem(i,str); pTable->SetItemText(i, 1, m_VecClus(i) ); } m_pResult->Add(pETable); return; } }
BOOL CMarkovChain::Main() { int id = 0; int L = 0; int i = 0; int j,k,l,t; m_nPageNum = m_Path_vec.Max(); CDoubleVector start_vec(m_nRow); CDoubleVector pairID_vec(m_nRow); CDoubleVector length_vec(m_nRow, 0); // (Preprocessing) begin // while (i < m_nRow) { L=0; start_vec(id) = i; pairID_vec(id) = m_ID_vec(i); while (pairID_vec(id) == m_ID_vec(i)) { length_vec(id)++; i++; if (i >= m_nRow) { break; } } id++; } m_nTotalID = id; start_vec.resize(m_nTotalID); pairID_vec.resize(m_nTotalID); length_vec.resize(m_nTotalID); CDoubleVector score(28); // (Preprocessing) end // // (Model selection) begin // if (m_bAutoCluster)//如果自动计算KClusters { int cutting = min(0.4 * m_nTotalID, 5000); int KClusters; for (KClusters=3; KClusters<15; KClusters++) { CDoubleMatrix responsibility(cutting, KClusters); // (Initialization) begin CDoubleVector Pi_vec(KClusters, 1.0/KClusters); CDoubleMatrix Theta_Init_mx(KClusters, m_nPageNum); srand(GetTickCount()); for (k=0; k<KClusters; k++) { for (j=0; j<m_nPageNum; j++) { Theta_Init_mx(k,j) = rand(); } } for (k=0; k<KClusters; k++) { double s = 0; for (j=0; j<m_nPageNum; j++) { s += Theta_Init_mx(k,j); } for (j=0; j<m_nPageNum; j++) { Theta_Init_mx(k,j) = Theta_Init_mx(k,j)/s; } } CDoubleMatrix * Theta_Trans= new CDoubleMatrix[KClusters]; for (k=0; k<KClusters; k++) { CDoubleMatrix Theta_Trans_mx(m_nPageNum, m_nPageNum); for (i=0; i<m_nPageNum; i++) { for (j=0; j<m_nPageNum; j++) { Theta_Trans_mx(i, j) = rand(); } } Theta_Trans[k] = Theta_Trans_mx; } for (k=0; k<KClusters; k++) { for (j=0; j<m_nPageNum; j++) { double s = 0; for (l=0; l<m_nPageNum; l++) { s += Theta_Trans[k](j,l); } for (l=0; l<m_nPageNum; l++) { Theta_Trans[k](j,l) = Theta_Trans[k](j,l)/s; } } } // (Initialization) end EMalgorithm(Pi_vec, Theta_Init_mx, Theta_Trans, responsibility, start_vec, length_vec, pairID_vec, KClusters, cutting); // (Compute Score) begin double LS2 = 0; double total_length = 0; for (i=cutting; i<m_nTotalID; i++) { double like = 0; for (k=0; k<KClusters; k++) { L = length_vec(i); double temp = 1; for (t=start_vec(i); t<start_vec(i)+L-1; t++) { temp *= Theta_Trans[k](m_Path_vec(t)-1,m_Path_vec(t+1)-1); } like += temp * Pi_vec(k) * Theta_Init_mx(k,m_Path_vec(start_vec(i))-1); } LS2 += log(like)/log(2); } for (i=0; i<m_nTotalID; i++) { total_length += length_vec(i); } score(KClusters-3) = -(LS2/total_length)+(0.01*KClusters); // (Compute Score) end } } for (i=0; i<28; i++) { if (score(i) < score(m_nCluster-3)) { m_nCluster = i+3; } } // (Model selection) end // // (Training) begin // CDoubleMatrix responsibility(m_nTotalID, m_nCluster); // (Initialization) CDoubleVector Pi_vec(m_nCluster, 1.0/m_nCluster); // Pi CDoubleMatrix Theta_Init_mx(m_nCluster, m_nPageNum); // Theta_Init srand(GetTickCount()); for (k=0; k<m_nCluster; k++) { for (j=0; j<m_nPageNum; j++) { Theta_Init_mx(k,j) = rand(); } } for (k=0; k<m_nCluster; k++) // normalization { double s = 0; for (j=0; j<m_nPageNum; j++) { s += Theta_Init_mx(k,j); } for (j=0; j<m_nPageNum; j++) { Theta_Init_mx(k,j) = Theta_Init_mx(k,j)/s; } } CDoubleMatrix * Theta_Trans= new CDoubleMatrix[m_nCluster]; // Theta_Trans for (k=0; k<m_nCluster; k++) { CDoubleMatrix Theta_Trans_mx(m_nPageNum, m_nPageNum); for (i=0; i<m_nPageNum; i++) { for (j=0; j<m_nPageNum; j++) { Theta_Trans_mx(i, j) = rand(); } } Theta_Trans[k] = Theta_Trans_mx; } for (k=0; k<m_nCluster; k++) // normalization { for (j=0; j<m_nPageNum; j++) { double s = 0; for (l=0; l<m_nPageNum; l++) { s += Theta_Trans[k](j,l); } for (l=0; l<m_nPageNum; l++) { Theta_Trans[k](j,l) = Theta_Trans[k](j,l)/s; } } } // (hyperparameter) double alpha_IT = 0.01 / m_nPageNum; // EM training EMalgorithm(Pi_vec, Theta_Init_mx, Theta_Trans, responsibility, start_vec, length_vec, pairID_vec, m_nCluster, m_nTotalID); // (Training) end // // (Hard assignment) begin // m_VecClus.create(m_nTotalID); for (i=0; i<m_nTotalID; i++) { int which = 0; for (k=0; k<m_nCluster; k++) { if (responsibility(i, which) > responsibility(i,k)) { which = k; } } m_VecClus(i) = which; } //输出模型信息到 m_strModel CTString strTemp; m_strModel = ""; strTemp.Format("%d", m_nCluster); m_strModel += strTemp; m_strModel += " "; strTemp.Format("%d", m_nPageNum); m_strModel += strTemp; m_strModel += " "; for (k=0; k<m_nCluster; k++) { for (j=0; j<m_nPageNum; j++) { for (i=0; i<m_nPageNum; i++) { strTemp.Format("%.9f", Theta_Trans[k](i,j)); m_strModel += strTemp; m_strModel += " "; } } } //输出模型信息到 m_strModel 完成 if (m_bSaveModel)//保存模型 CFileReadWrite::WriteStrToFile(m_ModelFile, m_strModel); // (Hard assignment) end // return TRUE; }