package com.alpha.test;import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;/**
* 读取html页面文件解析邮箱地址
*
* @author JavaAlpha 2012-12-19 13:45:11
*/
public class ReadHtmlToTxt { // 读取文件
if (htmlFile.exists() && htmlFile.isFile() && htmlFile.canRead()) {
try {
char[] buff = new char[4096];
int nch;
while ((nch = in.read(buff, 0, buff.length)) != -1) {
emailCont.
append(checkEmail
(new String(buff,
0, nch
)));
}
e.printStackTrace();
e.printStackTrace();
} } return emailCont.toString();
} // 判断字符串里面是否包括@符号
// 判断是否回复的内容
if (str.indexOf("@") > -1) { postCont = str.substring(str.indexOf("@") - 10,
str.indexOf("@") + 10); if (postCont.indexOf(">") > -1 || postCont.indexOf("<") > -1) {
postCont = postCont.replaceAll(">", "");
postCont = postCont.replaceAll("<", "");
postCont = postCont.replaceAll("/", "");
} if (postCont.indexOf(",") > -1 || postCont.indexOf(",") > -1
|| postCont.indexOf("。") > -1 || postCont.indexOf(";") > -1) {
postCont = postCont.replaceAll(",", "");
postCont = postCont.replaceAll(",", "");
postCont = postCont.replaceAll("。", "");
} postCont
= postCont.
substring(0, postCont.
indexOf(".com") + 4); System.
out.
println(postCont
);
} return postCont;
}
//过滤汉字
public static boolean checkChinese
(String str
) {
String regEx
= "[\\u4e00-\\u9fa5]";
Pattern p = Pattern.compile(regEx);
Matcher m = p.matcher(str);
if (m != null && m.find()){
return true;//是汉字
}
return false;
} // 将整理是邮箱地址写入文件
//如果文件不存在,创建文件
if (!emailFile.exists()) {
emailFile.createNewFile();
}
out.flush();
out.close();
e.printStackTrace();
} }
/**
* 读取网络内容
*/
public static void readUrlCont
(String strUrl
) {
try {
while ((lineCont = reader.readLine())!= null) {
cont.append(lineCont+"</br>");
}
reader.close();
e.printStackTrace();
e.printStackTrace();
}
System.
out.
println(cont.
toString());
} public static void main
(String[] args
) {
//String cont = readHtml("e://test.htm");//读取文件
//writerFile(cont, "e://test.txt");//写文件
//checkChinese("qwe123");
readUrlCont("http://www.163.com");
}}
//java/5981