package com.webssky.jcseg.filter;
import java.util.HashMap;
import java.util.Map;
/**
* a class to deal with Chinese numeric. <br />
*
* @author chenxin
* {@link http://www.webssky.com}
*/
public class CNNMFilter {
/**
* chinese numeric chars. <br />
* i have put the chars into the lexicon file lex-cn-numeric.lex for the old version. <r />
* it's better to follow the current work.
*/
private static final Character[] CN_NUMERIC
= {
'一', '二', '三', '四', '五','六', '七', '八', '九',
'壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌','玖',
'十', '百', '千', '拾', '佰', '仟', '万', '亿', '○', 'O', '零'};
private static Map
<Character, Integer
> cnNumeric
= null;
static {
cnNumeric
= new HashMap
<Character, Integer
>(40, 0.85f
);
for ( int j = 0; j < 9; j++ )
cnNumeric.put(CN_NUMERIC[j], j + 1);
for ( int j = 9; j < 18; j++ )
cnNumeric.put(CN_NUMERIC[j], j - 8);
cnNumeric.put('两', 2);
cnNumeric.put('十', 10);
cnNumeric.put('拾', 10);
cnNumeric.put('百', 100);
cnNumeric.put('佰', 100);
cnNumeric.put('千', 1000);
cnNumeric.put('仟', 1000);
cnNumeric.put('万', 10000);
cnNumeric.put('亿', 100000000);
}
/**
* check the given char is chinese numeric or not. <br />
*
* @param c <br />
* @return boolean true yes and false for not.
*/
public static int isCNNumeric( char c ) {
if ( i == null ) return -1;
return i.intValue();
}
/**
* a static method to turn the Chinese numeric to Arabic numbers.
*
* @param cnn
* @param boolea flag
* @return int
*/
public static int cnNumericToArabic
( String cnn,
boolean flag
) {
cnn = cnn.trim();
if ( cnn.length() == 1 )
return isCNNumeric(cnn.charAt(0));
if ( flag ) cnn = cnn.replace('佰', '百')
.replace('仟', '千').replace('拾', '十').replace('零', ' ');
//System.out.println(cnn);
int yi = -1, wan = -1, qian = -1, bai = -1, shi = -1;
int val = 0;
yi = cnn.lastIndexOf('亿');
if ( yi > -1 ) {
val += cnNumericToArabic( cnn.substring(0, yi), false ) * 100000000;
if ( yi < cnn.length() - 1 )
cnn = cnn.substring(yi + 1, cnn.length());
else
cnn = "";
if ( cnn.length() == 1 ) {
int arbic = isCNNumeric(cnn.charAt(0));
if ( arbic <= 10 )
val += arbic * 10000000;
cnn = "";
}
}
wan = cnn.lastIndexOf('万');
if ( wan > -1 ) {
val += cnNumericToArabic( cnn.substring(0, wan), false ) * 10000;
if ( wan < cnn.length() - 1 )
cnn = cnn.substring(wan + 1, cnn.length());
else
cnn = "";
if ( cnn.length() == 1 ) {
int arbic = isCNNumeric(cnn.charAt(0));
if ( arbic <= 10 )
val += arbic * 1000;
cnn = "";
}
}
qian = cnn.lastIndexOf('千');
if ( qian > -1 ) {
val += cnNumericToArabic( cnn.substring(0, qian), false ) * 1000;
if ( qian < cnn.length() - 1 )
cnn = cnn.substring(qian + 1, cnn.length());
else
cnn = "";
if ( cnn.length() == 1 ) {
int arbic = isCNNumeric(cnn.charAt(0));
if ( arbic <= 10 )
val += arbic * 100;
cnn = "";
}
}
bai = cnn.lastIndexOf('百');
if ( bai > -1 ) {
val += cnNumericToArabic( cnn.substring(0, bai), false ) * 100;
if ( bai < cnn.length() - 1 )
cnn = cnn.substring(bai + 1, cnn.length());
else
cnn = "";
if ( cnn.length() == 1 ) {
int arbic = isCNNumeric(cnn.charAt(0));
if ( arbic <= 10 )
val += arbic * 10;
cnn = "";
}
}
shi = cnn.lastIndexOf('十');
if ( shi > -1 ) {
if ( shi == 0 )
val += 1 * 10;
else
val += cnNumericToArabic( cnn.substring(0, shi), false ) * 10;
if ( shi < cnn.length() - 1 )
cnn = cnn.substring(shi + 1, cnn.length());
else
cnn = "";
}
cnn = cnn.trim();
for ( int j = 0; j < cnn.length(); j++ )
val += isCNNumeric(cnn.charAt(j))
* Math.
pow(10, cnn.
length() - j
- 1);
return val;
}
public static int qCNNumericToArabic
( String cnn
) {
int val = 0;
cnn = cnn.trim();
for ( int j = 0; j < cnn.length(); j++ )
val += isCNNumeric(cnn.charAt(j))
* Math.
pow(10, cnn.
length() - j
- 1);
return val;
}
/* public static void main(String[] args) {
ADictionary.isCNNumeric('一');
int val = 0;
long s = System.nanoTime();
//val = cnNumericToArabic("三亿二千零六万七千五百六", true);
//val = cnNumericToArabic("一九九八", true);
long e = System.nanoTime();
System.out.format("Done["+val+"], cost: %.5fsec\n", ((float)(e - s)) / 1E9);
}*/
}
//java/5577