库位码排序优化

库位码就是形如( A01-002-004 ) 的数据,前面一个大写字母,后面跟着一个整数。然后就是用 - 隔开再跟整数。上面说到的整数前面都可能有n个0填充 (排序时应该忽略)

比较规则:先比较第一个字母大小,然后依次比较后面的整数。

库位码比较器 (优化思路:1、避免产生新的字符串对象。2、不需要解析为整数) (下面程序解决了脏数据报错的问题)

import java.util.Comparator;


public class StoreLocalCodeComparator implements Comparator<String> {

    public char splitChar;

    public StoreLocalCodeComparator(char splitChar) {
        this.splitChar = splitChar;
    }

    /**
     * @param s1 非0开始的位置
     * @param e1 split 的位置
     * @param one
     * @param s2 非0开始的位置
     * @param e2 split 的位置
     * @param two
     * @return
     */
    private int partCompare(int s1, int e1, String one,
                        int s2, int e2, String two) {
        int i1 = e1 - s1;
        int i2 = e2 - s2;
        int i = i1 - i2;
        if(i == 0) {
            while(s1 < e1) {
                int t = one.charAt(s1) - two.charAt(s2);
                if(t != 0) {
                    return t;
                }
                s1++;
                s2++;
            }
            return 0;
        }
        return i;
    }

    private int findSplitChar(int start, String s) {
        int size = s.length();
        for(;start < size;start++) {
            if(s.charAt(start) == splitChar) {
                return start;
            }
        }
        return start;
    }

    private int findFirstNoZeroNumIndex(int start, String s) {
        int size = s.length();
        for(;start < size;start++) {
            char c = s.charAt(start);
            if(c > '0' && c <= '9') {
                break;
            }
        }
        return start;
    }

    @Override
    public int compare(String one, String two) {
        if(one == null) {
            if(two == null) {
                return 0;
            }
            return -1;
        }
        if(two == null) {
            return -1;
        }
        if("".equals(one)) {
            if("".equals(two)) {
                return 0;
            }
            return -1;
        }
        if("".equals(two)) {
            return -1;
        }
int c = one.charAt(0) - two.charAt(0); int s1, s2, e1 = 1, e2 = 1, l1 = one.length(), l2 = two.length(); boolean end = false; while(c == 0 && end == false) { s1 = findFirstNoZeroNumIndex(e1, one); s2 = findFirstNoZeroNumIndex(e2, two); if(s1 == l1 || s2 == l2) { end = true; } e1 = findSplitChar(s1, one); e2 = findSplitChar(s2, two); c = partCompare(s1, e1, one, s2, e2, two); } return c; } }

测试:(经过测试1百万的数据在内存中排序在2秒内,测试程序如下,感兴趣的可以测试一下)

import org.junit.Test;

import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ThreadLocalRandom;

public class SortTest {

    public List<String> initData(int size, boolean hasDirty) {
        List<String> list =  new ArrayList<>();
        for(int i = 0;i < size;i++) {
            list.add(randomCreateData(100, hasDirty));
        }
        return list;
    }

    private String dirtyData(int maxSize) {
        ThreadLocalRandom tlr = ThreadLocalRandom.current();
        int f = tlr.nextInt(2, maxSize);
        int i = tlr.nextInt(0, 6);
        if(i == 0) {
            return null;
        } else if(i == 1) {
            return "";
        } else if(i == 2) {
            return " ";
        } else if(i == 3) {
            String s = "";
            for(int j = 0;j < f;j++) {
                s += " ";
            }
            return s;
        } else if(i == 4) {
            return "" + (char)tlr.nextInt(33, 127);
        } else {
            String s = "";
            for (int j = 0; j < f; j++) {
                s += (char) tlr.nextInt(33, 127);
            }
            return s;
        }
    }

    private String randomCreateData(int numSize, boolean hasDirty) {
        ThreadLocalRandom tlr = ThreadLocalRandom.current();
        if(hasDirty && tlr.nextInt(100000) == 0) {
            return dirtyData(15);
        }
        StringBuilder sb = new StringBuilder();
        sb.append((char) tlr.nextInt(65, 91));
        addZero(sb, 3);
        sb.append(tlr.nextInt(1, numSize));
        sb.append('-');
        addZero(sb, 3);
        sb.append(tlr.nextInt(1, numSize));
        sb.append('-');
        addZero(sb, 3);
        sb.append(tlr.nextInt(1, numSize));
        return sb.toString();
    }

    /**
     * @param sb
     * @param maxSize
     */
    private void addZero(StringBuilder sb, int maxSize) {
        ThreadLocalRandom tlr = ThreadLocalRandom.current();
        int size = tlr.nextInt(0, maxSize);
        for(int i = 0;i <= size;i++) {
            sb.append('0');
        }
    }

    private void writeToFile(List<String> list, String filePath) throws IOException{
        try(FileWriter fw = new FileWriter(filePath)) {
            for (int i = 0; i < list.size(); i++) {
                fw.write(list.get(i) + "
");
            }
        }
    }

    /**
     * 将一百万数据排序后导出检查
     * @throws IOException
     */
    @Test
    public void test2() throws IOException {
        StoreLocalCodeComparator comparator = new StoreLocalCodeComparator('-');
        List<String> list = initData(1000000, false);
        long start = System.currentTimeMillis();
        list.sort(comparator);
        System.out.println("耗时: " + (System.currentTimeMillis() - start));
        String path = System.getProperty("java.io.tmpdir") + "test.txt";
        writeToFile(list, path);
        System.out.println(path);
    }

    /**
     * 连续10次测试一百万数据。
     * @throws IOException
     */
    @Test
    public void test() throws IOException {
        StoreLocalCodeComparator comparator = new StoreLocalCodeComparator('-');
        List<String> list = null;
        for(int i = 0;i < 10;i++) {
            try {
                list = initData(1000000, true);
                long start = System.currentTimeMillis();
                list.sort(comparator);
                System.out.println("耗时: " + (System.currentTimeMillis() - start));
            } catch (Exception e) {
                String path = System.getProperty("java.io.tmpdir") + "current_error.txt";
                writeToFile(list, path);
                System.out.println(path);
                e.printStackTrace();
            }
        }

        //通过前面50条数据校验
        int size = Math.min(50, list.size());
        for(int i = 0;i < size;i++) {
            System.out.println(list.get(i));
        }
    }

}
原文地址:https://www.cnblogs.com/math-and-it/p/15557439.html