方法一、网络爬取
package com.qcqc.javaStudy;
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RandomName {
public static void main(String[] args) throws IOException {
//1.定义变量记录网址
String familyNameNet = "https://hanyu.baidu.com/shici/detail?pid=0b2f26d4c0ddb3ee693fdb1137ee1b0d";
String boyNameNet = "http://www.haoming8.cn/baobao/10881.html";
String girlNameNet = "http://www.haoming8.cn/baobao/7641.html";
//2.把网址上的数据拼接成字符串(定义方法)
String familyNameStr = webCrawler(familyNameNet);
String boyNameStr = webCrawler(boyNameNet);
String girlNameStr = webCrawler(girlNameNet);
//3.通过正则表达式筛选数据
ArrayList<String> familyNameTempList = getData(familyNameStr,"(.{4})(,|。)",1);
ArrayList<String> boyNameTempList = getData(boyNameStr,"([\\u4E00-\\u9FA5]{2})(、|。)",1);
ArrayList<String> girlNameTempList = getData(girlNameStr,"(.. ){4}..",0);
//4.处理数据
// 姓氏分开 ArrayList<String> familyNameList = new ArrayList<>();
for (String str : familyNameTempList) {
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
familyNameList.add(c + "");
}
}
// 男生名字去重
ArrayList<String> boyNameList = new ArrayList<>();
for (String str : boyNameTempList) {
if(!boyNameList.contains(str)){
boyNameList.add(str);
}
}
// 女生名字按照逗号分开,单独存放
ArrayList<String> girlNameList = new ArrayList<>();
for (String str : girlNameTempList) {
girlNameList.addAll(Arrays.asList(str.split(" ")));
}
//5.生成名字
// 姓名(唯一)-性别-年龄 ArrayList<String> infos = getinfos(familyNameList, boyNameList, girlNameList, 700, 700);
Collections.shuffle(infos);
System.out.println(infos);
//6.写出数据
BufferedWriter bw = new BufferedWriter(new FileWriter("RandomName.txt"));
for (String info : infos) {
bw.write(info);
bw.newLine();
}
bw.close();
}
/*
* 参数一:装着姓氏的集合 * 参数二:装着女生姓名的集合 * 参数三:装着男生姓名的集合 * 参数四:男生的个数 * 参数五:女生的个数 * */ public static ArrayList<String>
getinfos(ArrayList<String> familyNameList, ArrayList<String> boyNameList, ArrayList<String> girlNameList ,int boyCount,int girlCount){
//1.生成不重复的名字
HashSet<String> boyhs = new HashSet<>();
while(boyhs.size() < boyCount){
Collections.shuffle(familyNameList);
Collections.shuffle(boyNameList);
boyhs.add(familyNameList.get(0) + boyNameList.get(0));
}
HashSet<String> girlhs = new HashSet<>();
while(girlhs.size() < girlCount){
Collections.shuffle(familyNameList);
Collections.shuffle(girlNameList);
girlhs.add(familyNameList.get(0) + girlNameList.get(0));
}
ArrayList<String> infos = new ArrayList<>();
Random random = new Random();
//18-27岁的男生
for (String boyName : boyhs) {
infos.add(boyName + "-男-" + (random.nextInt(10)+18));
}
//18-25岁的女生
for (String girlName : girlhs) {
infos.add(girlName + "-女-" + (random.nextInt(8)+18));
}
//返回数据
return infos;
}
/*
* 根据正则表达式获取数据 * 参数一、完整字符串 * 参数二、正则表达式 * */
private static ArrayList<String> getData(String str, String regex,int index) {
//1.创建集合存放数据
ArrayList<String> list = new ArrayList<>();
//2.按照正则表达式的规则,去获取数据
Pattern pattern = Pattern.compile(regex);
//3.用正则表达式去匹配字符串
Matcher matcher = pattern.matcher(str);
while (matcher.find()) {
list.add(matcher.group(index));
}
return list;
}
/*
* 返回值是html源码 * */
public static String webCrawler(String net) throws IOException {
//1.定义StringBuilder拼接数据
StringBuilder sb = new StringBuilder();
//2.创建一个URL对象
URL url = new URL(net);
//3.链接上这个网址
URLConnection conn = url.openConnection();
//4.读取数据
InputStreamReader isr = new InputStreamReader(conn.getInputStream());
int ch;
while ((ch = isr.read()) != -1) {
sb.append((char)ch);
}
//5.释放资源
isr.close();
return sb.toString();
}
}
package com.qcqc.javaStudy;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.ReUtil;
import cn.hutool.http.HttpUtil;
import java.util.*;
public class RandomNameWithHutool {
public static void main(String[] args) {
//1.定义变量记录网址
String familyNameNet = "https://hanyu.baidu.com/shici/detail?pid=0b2f26d4c0ddb3ee693fdb1137ee1b0d";
String boyNameNet = "http://www.haoming8.cn/baobao/10881.html";
String girlNameNet = "http://www.haoming8.cn/baobao/7641.html";
//2.爬取数据
String familyNameStr = HttpUtil.get(familyNameNet);
String boyNameStr = HttpUtil.get(boyNameNet);
String girlNameStr = HttpUtil.get(girlNameNet);
//3.通过正则表达式获取数据
List<String> familyNameTempList = ReUtil.findAll( "(.{4})(,|。)",familyNameStr, 1);
List<String> boyNameTempList = ReUtil.findAll( "([\\u4E00-\\u9FA5]{2})(、|。)",boyNameStr, 1);
List<String> girlNameTempList = ReUtil.findAll( "(.. ){4}..",girlNameStr, 0);
//4.处理数据
// 姓氏分开 ArrayList<String> familyNameList = new ArrayList<>();
for (String str : familyNameTempList) {
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
familyNameList.add(c + "");
}
}
// 男生名字去重
ArrayList<String> boyNameList = new ArrayList<>();
for (String str : boyNameTempList) {
if(!boyNameList.contains(str)){
boyNameList.add(str);
}
}
// 女生名字按照逗号分开,单独存放
ArrayList<String> girlNameList = new ArrayList<>();
for (String str : girlNameTempList) {
girlNameList.addAll(Arrays.asList(str.split(" ")));
}
//5.生成名字
// 姓名(唯一)-性别-年龄 ArrayList<String> infos = getinfos(familyNameList, boyNameList, girlNameList, 700, 700);
Collections.shuffle(infos);
System.out.println(infos);
//6.写出数据
FileUtil.writeLines(infos,"RandomNameWithHutool.txt","UTF-8");
}
public static ArrayList<String> getinfos(ArrayList<String> familyNameList, ArrayList<String> boyNameList, ArrayList<String> girlNameList ,int boyCount,int girlCount){
//1.生成不重复的名字
HashSet<String> boyhs = new HashSet<>();
while(boyhs.size() < boyCount){
Collections.shuffle(familyNameList);
Collections.shuffle(boyNameList);
boyhs.add(familyNameList.get(0) + boyNameList.get(0));
}
HashSet<String> girlhs = new HashSet<>();
while(girlhs.size() < girlCount){
Collections.shuffle(familyNameList);
Collections.shuffle(girlNameList);
girlhs.add(familyNameList.get(0) + girlNameList.get(0));
}
ArrayList<String> infos = new ArrayList<>();
Random random = new Random();
//18-27岁的男生
for (String boyName : boyhs) {
infos.add(boyName + "-男-" + (random.nextInt(10)+18));
}
//18-25岁的女生
for (String girlName : girlhs) {
infos.add(girlName + "-女-" + (random.nextInt(8)+18));
}
//返回数据
return infos;
}
}