随机生成华人姓名的Java工具类:权重 + 随机


package com.zhengyuxiao.toolbox.random;

import java.util.*;

/**
 * @author xzy
 * @date 2020-12-24 11:55
 * 说明:随机工具
 */
public class RandomUtils {

    private RandomUtils() {
    }

    /**
     * 带权重的随机决策——基于线性扫描
     * 注意:
     * 1. 时间复杂度O(n),n = prizePool.length
     * 2. 将权重大的数据靠前,可以减少列表遍历的次数
     *
     * @param prizePool  - “奖品池”,即需要决策的数据
     * @param weightPool - “奖品权重”,即数据所占的权重(决定数据被选中的概率)
     * @param         - 数据类型
     * @return - 随机抽中的“奖品”
     */
    public static  E randomDecisionWithWeight(E[] prizePool, int[] weightPool) {
        if (prizePool.length == 0 || prizePool.length != weightPool.length) {
            throw new IllegalArgumentException();
        }

        /*
         * 奖品池、奖品权重:
         *
         *      A      B           C                       D
         *   |-----|-------|---------------|-------------------------------|
         *    1/15    2/15        4/15                    8/15
         *
         *  生成一个[0,15)区间内的随机数x,根据x所处的子区间决定抽取到的奖品:
         *                                     x
         *  |----------------------------------|
         */

        // 计算总权重,确定随机数生成范围
        int weightSum = 0;
        for (int weight : weightPool) {
            if (weight < 0) {
                throw new IllegalArgumentException("权重不允许是负数!");
            }
            weightSum += weight;
        }

        // 抽取数据
        int randomPrizePoint = new Random().nextInt(weightSum);
        E randomPrize = null;
        for (int i = 0; i < weightPool.length; i++) {
            if (randomPrizePoint < weightPool[i]) {
                randomPrize = prizePool[i];
                break;
            } else {
                randomPrizePoint -= weightPool[i];
            }
        }

        return randomPrize;
    }

    public static void main(String[] args) {
        String[] prizePool = new String[]{"一等奖", "二等奖", "三等奖", "鼓励奖"};
        int[] prizeWeight = new int[]{1, 2, 4, 8};

        Map count = new HashMap<>(4);
        for (int i = 0; i < 1000; i++) {
            String randomPrize = randomDecisionWithWeight(prizePool, prizeWeight);
            count.put(randomPrize, count.getOrDefault(randomPrize, 0) + 1);
            System.out.println(randomPrize);
        }

        System.out.println("统计:" + count);
    }
}
package com.zhengyuxiao.toolbox.random;

import java.nio.charset.Charset;
import java.util.Random;

/**
 * 汉字工具
 *
 * @author xzy
 * @date 2021/10/2113:49
 */
public class ChineseUtil {
    /**
     * 中华姓氏(按照使用人数由多到少排序)
     */
    public static final String[] LAST_NAME = new String[]{
            "赵", "钱", "孙", "李", "周", "吴", "郑", "王", "冯", "陈", "褚", "卫", "蒋", "沈", "韩", "杨", "朱", "秦", "尤",
            "许", "何", "吕", "施", "张", "孔", "曹", "严", "华", "金", "魏", "陶", "姜", "戚", "谢", "邹", "喻", "柏", "水",
            "窦", "章", "云", "苏", "潘", "葛", "奚", "范", "彭", "郎", "鲁", "韦", "昌", "马", "苗", "凤", "花", "方", "俞",
            "任", "袁", "柳", "酆", "鲍", "史", "唐", "费", "廉", "岑", "薛", "雷", "贺", "倪", "汤", "滕", "殷", "罗", "毕",
            "郝", "邬", "安", "常", "于", "时", "傅", "皮", "卞", "齐", "康", "伍", "余", "元", "卜", "顾", "孟", "平", "黄",
            "和", "穆", "萧", "尹", "姚", "邵", "湛", "汪", "祁", "毛", "禹", "狄", "米", "贝", "明", "臧", "计", "伏", "成",
            "戴", "谈", "宋", "茅", "庞", "熊", "纪", "舒", "屈", "项", "祝", "董", "梁", "杜", "阮", "蓝", "闵", "席", "季",
            "麻", "强", "贾", "路", "娄", "危", "江", "童", "颜", "郭", "梅", "盛", "林", "刁", "钟", "徐", "邱", "骆", "高",
            "夏", "蔡", "田", "樊", "胡", "凌", "霍", "虞", "万", "支", "柯", "昝", "管", "卢", "莫", "经", "房", "裘", "缪",
            "干", "解", "应", "宗", "丁", "宣", "贲", "邓", "郁", "单", "杭", "洪", "包", "诸", "左", "石", "崔", "吉", "钮",
            "龚", "程", "嵇", "邢", "滑", "裴", "陆", "荣", "翁", "荀", "羊", "於", "惠", "甄", "曲", "家", "封", "芮", "羿",
            "储", "靳", "汲", "邴", "糜", "松", "井", "段", "富", "巫", "乌", "焦", "巴", "弓", "牧", "隗", "山", "谷", "车",
            "侯", "宓", "蓬", "全", "郗", "班", "仰", "秋", "仲", "伊", "宫", "宁", "仇", "栾", "暴", "甘", "钭", "厉", "戎",
            "祖", "武", "符", "刘", "景", "詹", "束", "龙", "叶", "幸", "司", "韶", "郜", "黎", "蓟", "薄", "印", "宿", "白",
            "怀", "蒲", "台", "从", "鄂", "索", "咸", "籍", "赖", "卓", "蔺", "屠", "蒙", "池", "乔", "阴", "欎", "胥", "能",
            "苍", "双", "闻", "莘", "党", "翟", "谭", "贡", "劳", "逄", "姬", "申", "扶", "堵", "冉", "宰", "郦", "雍", "郤",
            "璩", "桑", "桂", "濮", "牛", "寿", "通", "边", "扈", "燕", "冀", "郏", "浦", "尚", "农", "温", "别", "庄", "晏",
            "柴", "瞿", "阎", "充", "慕", "连", "茹", "习", "宦", "艾", "鱼", "容", "向", "古", "易", "慎", "戈", "廖", "庾",
            "终", "暨", "居", "衡", "步", "都", "耿", "满", "弘", "匡", "国", "文", "寇", "广", "禄", "阙", "东", "殴", "殳",
            "沃", "利", "蔚", "越", "夔", "隆", "师", "巩", "厍", "聂", "晁", "勾", "敖", "融", "冷", "訾", "辛", "阚", "那",
            "简", "饶", "空", "曾", "毋", "沙", "乜", "养", "鞠", "须", "丰", "巢", "关", "蒯", "相", "查", "后", "荆", "红",
            "游", "竺", "权", "逯", "盖", "益", "桓", "公", "万俟", "司马", "上官", "欧阳", "夏侯", "诸葛", "闻人", "东方", "赫连",
            "皇甫", "尉迟", "公羊", "澹台", "公冶", "宗政", "濮阳", "淳于", "单于", "太叔", "申屠", "公孙", "仲孙", "轩辕", "令狐",
            "钟离", "宇文", "长孙", "慕容", "鲜于", "闾丘", "司徒", "司空", "亓官", "司寇", "仉", "督", "子车", "颛孙", "端木", "巫马",
            "公西", "漆雕", "乐正", "壤驷", "公良", "拓跋", "夹谷", "宰父", "谷梁", "晋", "楚", "闫", "法", "汝", "鄢", "涂", "钦",
            "段干", "百里", "东郭", "南", "门", "呼延", "归海", "羊舌", "微生", "岳", "帅", "缑", "亢", "况", "郈", "有", "琴", "梁丘",
            "左丘", "东门", "西门", "商", "牟", "佘", "佴", "伯", "赏", "南宫", "墨", "哈", "谯", "笪", "年", "爱", "阳", "佟", "第五",
            "言", "福", "百", "姓"
    };

    /**
     * 各姓氏的权重(数值越大表明使用频率越高)
     * A                  B             C       D    E  F
     * |--------------------|---------------|----------|-----|---|-|
     */
    public static final int[] LAST_NAME_WEIGHTS = new int[]{
            505, 504, 503, 502, 501, 500, 499, 498, 497, 496, 495, 494, 493, 492, 491, 490, 489, 488, 487, 486, 485,
            484, 483, 482, 481, 480, 479, 478, 477, 476, 475, 474, 473, 472, 471, 470, 469, 468, 467, 466, 465, 464,
            463, 462, 461, 460, 459, 458, 457, 456, 455, 454, 453, 452, 451, 450, 449, 448, 447, 446, 445, 444, 443,
            442, 441, 440, 439, 438, 437, 436, 435, 434, 433, 432, 431, 430, 429, 428, 427, 426, 425, 424, 423, 422,
            421, 420, 419, 418, 417, 416, 415, 414, 413, 412, 411, 410, 409, 408, 407, 406, 405, 404, 403, 402, 401,
            400, 399, 398, 397, 396, 395, 394, 393, 392, 391, 390, 389, 388, 387, 386, 385, 384, 383, 382, 381, 380,
            379, 378, 377, 376, 375, 374, 373, 372, 371, 370, 369, 368, 367, 366, 365, 364, 363, 362, 361, 360, 359,
            358, 357, 356, 355, 354, 353, 352, 351, 350, 349, 348, 347, 346, 345, 344, 343, 342, 341, 340, 339, 338,
            337, 336, 335, 334, 333, 332, 331, 330, 329, 328, 327, 326, 325, 324, 323, 322, 321, 320, 319, 318, 317,
            316, 315, 314, 313, 312, 311, 310, 309, 308, 307, 306, 305, 304, 303, 302, 301, 300, 299, 298, 297, 296,
            295, 294, 293, 292, 291, 290, 289, 288, 287, 286, 285, 284, 283, 282, 281, 280, 279, 278, 277, 276, 275,
            274, 273, 272, 271, 270, 269, 268, 267, 266, 265, 264, 263, 262, 261, 260, 259, 258, 257, 256, 255, 254,
            253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, 239, 238, 237, 236, 235, 234, 233,
            232, 231, 230, 229, 228, 227, 226, 225, 224, 223, 222, 221, 220, 219, 218, 217, 216, 215, 214, 213, 212,
            211, 210, 209, 208, 207, 206, 205, 204, 203, 202, 201, 200, 199, 198, 197, 196, 195, 194, 193, 192, 191,
            190, 189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, 172, 171, 170,
            169, 168, 167, 166, 165, 164, 163, 162, 161, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149,
            148, 147, 146, 145, 144, 143, 142, 141, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131, 130, 129, 128,
            127, 126, 125, 124, 123, 122, 121, 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107,
            106, 105, 104, 103, 102, 101, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82,
            81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55,
            54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28,
            27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1
    };

    /**
     * 获取随机生成的汉字
     *
     * @return - 随机生成的汉字
     */
    public static String getRandomChineseChar() {
        /*
         *  汉字以两个字节存储,称为“区位码”,高位叫区码,低位叫位码。假设有一张汉字表,横竖都是 94列,那么区码就相当于行,位码就相当于列,根据行列就可
         *  以确定一个汉字了,这有点像二位数组。GB2312大致就是按照这种方式实现的,1-9区存放特殊字符,16-55区存放一级汉字,56-87区存放二级汉字,其余
         *  暂时空余。为了区别中文与西文字母,在中文字符首位以1开头区分以0开头的ASCII码,GB2312给每个中文字符加上0xA0。
         *
         *  因此,汉字的区码范围0xB0-0xF7,位码范围0xA0-0xFE。
         */

        // 随机生成区码、位码
        Random random = new Random();
        int highPos = (176 + Math.abs(random.nextInt(39)));
        int lowPos = (161 + Math.abs(random.nextInt(93)));

        // 准备字节码
        byte[] b = new byte[2];
        b[0] = (Integer.valueOf(highPos)).byteValue();
        b[1] = (Integer.valueOf(lowPos)).byteValue();

        // 生成汉字
        return new String(b, Charset.forName("GBK"));
    }

    /**
     * 获取随机生成的中国姓氏
     *
     * @param absolutelyFair true:绝对公平 false:使用频率高的姓氏抽取的概率高
     * @return - 随机生成的中国姓氏
     */
    public static String getRandomChineseLastName(boolean absolutelyFair) {
        if (absolutelyFair) {
            // 完全随机
            return LAST_NAME[new Random().nextInt(LAST_NAME.length - 1)];
        } else {
            // 基于权重的随机:使用频率高的姓氏被抽中的概率高
            return RandomUtils.randomDecisionWithWeight(LAST_NAME, LAST_NAME_WEIGHTS);
        }
    }

    /**
     * 获取随机生成的中文姓名
     *
     * @return - 随机生成的中文姓名
     */
    public static String getRandomChineseName() {
        // 随机选取姓氏
        String lastName = getRandomChineseLastName(false);

        // 随机生成名字(控制8/10的人名字长度为2)
        int firstNameLength = new Random().nextInt(10);
        String firstName = firstNameLength < 8 ? getRandomChineseChar() + getRandomChineseChar() : getRandomChineseChar();

        // 返回姓名:姓氏 + 名字
        return lastName + firstName;
    }

    public static void main(String[] args) {
        for (int i = 0; i < 1000; i++) {
            System.out.println(getRandomChineseName());
        }
    }
}

待优化:没有根据不同姓氏的实际使用频率设置权重