
在工程中遇到这么一个问题,有两个升序集合A和B,a是A中的元素,b是B中的元素,希望求解a与b之差绝对值(abs(a-b))的最小值。
二分
朴素算法复杂度是O(n*n),二分算法O(n*logn),又因为两个集合都是升序的,所以可以优化到小于O(n*logn)。用Java实现如下:
package com.hankcs.test.algorithm;
import java.util.Arrays;
import java.util.TreeSet;
/**
* 求两个集合中最相近的两个数
*
* @author hankcs
*/
public class BinarySearch
{
public static void main(String[] args)
{
TreeSet<Integer> setA = new TreeSet<>();
setA.add(5);
TreeSet<Integer> setB = new TreeSet<>();
setB.add(1);
setB.add(2);
setB.add(3);
setB.add(8);
setB.add(16);
System.out.println(computeMinimumDistance(setA, setB));
}
public static Integer computeMinimumDistance(TreeSet<Integer> setA, TreeSet<Integer> setB)
{
Integer[] arrayA = setA.toArray(new Integer[0]);
Integer[] arrayB = setB.toArray(new Integer[0]);
int startIndex = 0;
Integer min_distance = Integer.MAX_VALUE;
for (int i = 0; i < arrayA.length; ++i)
{
startIndex = Arrays.binarySearch(arrayB, startIndex, arrayB.length, arrayA[i]);
if (startIndex < 0)
{
startIndex = -startIndex - 1;
if (startIndex - 1 >= 0)
{
min_distance = Math.min(min_distance, arrayA[i] - arrayB[startIndex - 1]);
}
if (startIndex < arrayB.length)
{
min_distance = Math.min(min_distance, arrayB[startIndex] - arrayA[i]);
}
}
else return 0;
}
return min_distance;
}
}
事实上,第二层循环中的复杂度并不是logn,因为二分的范围越来越小,实际复杂度应该小于O(n*logn),具体多少,我不知道,如果你知道,欢迎指教。
一种O(n)时间的解法
public static Integer computeMinimumDistanceB(Integer[] a, Integer[] b)
{
int aindex = 0;
int bindex = 0;
int min = Math.abs(a[0] - b[0]);
while (true)
{
if (a[aindex] > b[bindex])
{
bindex++;
}
else
{
aindex++;
}
if (aindex >= a.length || bindex >= b.length)
{
break;
}
if (Math.abs(a[aindex] - b[bindex]) < min)
{
min = Math.abs(a[aindex] - b[bindex]);
}
}
return min;
}
感谢Go_Wizard的指点。
两种方法比较
经过随机测试,第二种方法的确快很多。
package com.hankcs;
import java.util.Arrays;
import java.util.Random;
import java.util.TreeSet;
public class Main
{
public static void main(String[] args)
{
Random random = new Random(System.currentTimeMillis());
long timeA = 0;
long timeB = 0;
for (int _ = 0; _ < 1000; ++_)
{
TreeSet<Integer> setA = new TreeSet<Integer>();
int sizeA = random.nextInt(1000) + 1;
while (sizeA-- > 0)
{
setA.add(random.nextInt());
}
TreeSet<Integer> setB = new TreeSet<Integer>();
int sizeB = random.nextInt(1000) + 1;
while (sizeB-- > 0)
{
setB.add(random.nextInt());
}
Integer[] arrayA = setA.toArray(new Integer[0]);
Integer[] arrayB = setB.toArray(new Integer[0]);
long start = System.currentTimeMillis();
int resultA = computeMinimumDistanceA(arrayA, arrayB);
timeA += System.currentTimeMillis() - start;
start = System.currentTimeMillis();
int resultB = computeMinimumDistanceB(arrayA, arrayB);
timeB += System.currentTimeMillis() - start;
if (resultA != resultB)
{
System.out.printf("A:%d B:%d %s %s\n", resultA, resultB, Arrays.toString(arrayA), Arrays.toString(arrayB));
}
}
System.out.printf("A用时%d B用时%d\n", timeA, timeB);
}
public static Integer computeMinimumDistanceA(Integer[] arrayA, Integer[] arrayB)
{
int startIndex = 0;
Integer min_distance = Integer.MAX_VALUE;
for (int i = 0; i < arrayA.length; ++i)
{
startIndex = Arrays.binarySearch(arrayB, startIndex, arrayB.length, arrayA[i]);
if (startIndex < 0)
{
startIndex = -startIndex - 1;
if (startIndex - 1 >= 0)
{
min_distance = Math.min(min_distance, Math.abs(arrayA[i] - arrayB[startIndex - 1]));
}
if (startIndex < arrayB.length)
{
min_distance = Math.min(min_distance, Math.abs(arrayB[startIndex] - arrayA[i]));
}
}
else return 0;
}
return min_distance;
}
public static Integer computeMinimumDistanceB(Integer[] a, Integer[] b)
{
int aindex = 0;
int bindex = 0;
int min = Math.abs(a[0] - b[0]);
while (true)
{
if (a[aindex] > b[bindex])
{
bindex++;
}
else
{
aindex++;
}
if (aindex >= a.length || bindex >= b.length)
{
break;
}
if (Math.abs(a[aindex] - b[bindex]) < min)
{
min = Math.abs(a[aindex] - b[bindex]);
}
}
return min;
}
}
输出
A用时36 B用时11
码农场
为什么要使用二分法去查找呢?应该在O(nA+nB)的可以实现的
利用有序性,求教O(nA+nB)
// ArrayComp.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include "cmath"
int _tmain(int argc, _TCHAR* argv[])
{
int a[10] = {1,3,5,7,9,11,13,15,17,19};
int b[10] = {2,4,6,8,9,12,14,16,18,20};
int aminindex = 0;
int bminindex = 0;
int aindex = 0;
int bindex = 0;
int min = abs(a[0] – b[0]);
while(1)
{
if(a[aindex]> b[bindex])
{
bindex ++;
}
else
{
aindex++;
}
if(aindex > 9 || bindex>9)
{
break;
}
if(abs(a[aindex]-b[bindex]) < min)
{
min =abs(a[aindex]-b[bindex]);
aminindex = aindex;
bminindex = bindex;
}
}
printf("aindex=%d,bindex=%d",aminindex,bminindex);
return 0;
}
感谢指点,你是对的,的确不用二分也行。
最烦算法了,以后肯定不能靠这个混