[Java] Java读取Html文本解析email地址的代码 →→→→→进入此内容的聊天室

来自 , 2020-09-24, 写在 Java, 查看 107 次.
URL http://www.code666.cn/view/154aa686
  1. package com.alpha.test;import java.io.BufferedReader;
  2. import java.io.File;
  3. import java.io.FileNotFoundException;
  4. import java.io.FileReader;
  5. import java.io.FileWriter;
  6. import java.io.IOException;
  7. import java.io.InputStreamReader;
  8. import java.io.Reader;
  9. import java.io.Writer;
  10. import java.net.MalformedURLException;
  11. import java.net.URL;
  12. import java.net.URLConnection;
  13. import java.util.regex.Matcher;
  14. import java.util.regex.Pattern;/**
  15.  * 读取html页面文件解析邮箱地址
  16.  *
  17.  * @author JavaAlpha 2012-12-19 13:45:11
  18.  */
  19. public class ReadHtmlToTxt { // 读取文件
  20.  public static String readHtml(String path) {  StringBuffer emailCont = new StringBuffer();  File htmlFile = new File(path);
  21.   if (htmlFile.exists() && htmlFile.isFile() && htmlFile.canRead()) {
  22.    Reader in;
  23.    try {
  24.     in = new FileReader(htmlFile);
  25.     char[] buff = new char[4096];
  26.     int nch;
  27.     while ((nch = in.read(buff, 0, buff.length)) != -1) {
  28.      emailCont.append(checkEmail(new String(buff, 0, nch)));
  29.     }
  30.    } catch (FileNotFoundException e) {
  31.     e.printStackTrace();
  32.    } catch (IOException e) {
  33.     e.printStackTrace();
  34.    }  }  return emailCont.toString();
  35.  } // 判断字符串里面是否包括@符号
  36.  public static String checkEmail(String str) {  String postCont = "";
  37.   // 判断是否回复的内容
  38.   if (str.indexOf("@") > -1) {   postCont = str.substring(str.indexOf("@") - 10,
  39.      str.indexOf("@") + 10);   if (postCont.indexOf(">") > -1 || postCont.indexOf("<") > -1) {
  40.     postCont = postCont.replaceAll(">", "");
  41.     postCont = postCont.replaceAll("<", "");
  42.     postCont = postCont.replaceAll("/", "");
  43.    }   if (postCont.indexOf(",") > -1 || postCont.indexOf(",") > -1
  44.      || postCont.indexOf("。") > -1 || postCont.indexOf(";") > -1) {
  45.     postCont = postCont.replaceAll(",", "");
  46.     postCont = postCont.replaceAll(",", "");
  47.     postCont = postCont.replaceAll("。", "");
  48.    }   postCont = postCont.substring(0, postCont.indexOf(".com") + 4);   System.out.println(postCont);
  49.   }  return postCont;
  50.  }
  51.  
  52.  //过滤汉字
  53.  public static boolean checkChinese(String str) {
  54.  
  55.   String regEx = "[\\u4e00-\\u9fa5]";
  56.   Pattern p = Pattern.compile(regEx);
  57.   Matcher m = p.matcher(str);
  58.   if (m != null && m.find()){
  59.    return true;//是汉字
  60.   }
  61.   return false;
  62.  } // 将整理是邮箱地址写入文件
  63.  public static void writerFile(String cont, String path) {  File emailFile = new File(path);  try {
  64.    //如果文件不存在,创建文件
  65.    if (!emailFile.exists()) {
  66.     emailFile.createNewFile();
  67.    }
  68.    
  69.    Writer out = new FileWriter(emailFile);   out.write(cont);
  70.    out.flush();
  71.    out.close();
  72.   } catch (Exception e) {
  73.    e.printStackTrace();
  74.   } }
  75.  
  76.  /**
  77.   * 读取网络内容
  78.   */
  79.  public static void readUrlCont(String strUrl) {
  80.  
  81.   StringBuffer cont = new StringBuffer();//内容
  82.  
  83.   try {
  84.    URL url = new URL(strUrl);
  85.    URLConnection conn = url.openConnection();
  86.    BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
  87.    String lineCont = "";
  88.    while ((lineCont = reader.readLine())!= null) {
  89.     cont.append(lineCont+"</br>");
  90.    }
  91.    
  92.    reader.close();
  93.    
  94.   } catch (MalformedURLException e) {
  95.    e.printStackTrace();
  96.   } catch (IOException e) {
  97.    e.printStackTrace();
  98.   }
  99.  
  100.   System.out.println(cont.toString());
  101.  } public static void main(String[] args) {
  102.  
  103.   //String cont = readHtml("e://test.htm");//读取文件
  104.  
  105.   //writerFile(cont, "e://test.txt");//写文件
  106.  
  107.   //checkChinese("qwe123");
  108.  
  109.   readUrlCont("http://www.163.com");
  110.  
  111.  }}
  112.  
  113. //java/5981

回复 "Java读取Html文本解析email地址的代码"

这儿你可以回复上面这条便签

captcha