关于一个运算优化的问题

litaoye · 发表于 2009-3-20 04:05:19

我也贴个C#，看看对大家的思路有没有帮助，这道题的平均水平都被大家带高了！呵呵！

class Program1318Xor
{
class BigIntCompare : IComparer<BigInt>
{
public int Compare(BigInt x, BigInt y)
{
for (int i = 3; i >= 0; i--)
{
if (x.Items[i] > y.Items[i])
return 1;
else if ((x.Items[i] < y.Items[i]))
return -1;
}
return 0;
}
}
class BigInt
{
public uint[] Items;
public BigInt(string str)
{
string[] item = str.Split(' ');
Items = new uint[4];
for (int i = 0; i < 4; i++)
Items[3 - i] = uint.Parse(item[i]);
}
public BigInt()
{
Items = new uint[4];
}
public BigInt(uint A)
{
Items = new uint[4];
Items[0] = A;
}
public bool BitNum(int index)
{
int bit = index >> 5;
int current = index - (bit << 5);
return ((Items[bit] >> current) & 1) == 1;
}
public int FirstIndexOfOne()
{
for (int i = 3; i >= 0; i--)
{
if (Items[i] == 0)
continue;
for (int j = 31; j >= 0; j--)
{
if (((Items[i] >> j) & 1) == 1)
return (j + (i << 5));
}
}
return 0;
}
public int IndexOfOne(int start)
{
while (start > 0 && !BitNum(start))
start--;
return start;
}
public static BigInt operator *(BigInt A, uint B)
{
BigInt result = new BigInt();
ulong mTemp = 0;
uint cTemp = 0;
for (int i = 0; i < 4; i++)
{
mTemp = (ulong)A.Items[i] * (ulong)B;
result.Items[i] = (uint)(mTemp & 0xFFFFFFFF);
result.Items[i] += cTemp;
cTemp = (uint)(mTemp >> 32);
}
if (cTemp != 0)
return null;
return result;
}
}
private static List<BigInt> AllNumber;
private static List<int> AllCount;
private static List<int> AllAccumlation;
private static List<BigInt> PowerOf10;
private static int[] PowerOf10Log;
static void solve()
{
preOperation();
AllNumber = new List<BigInt>();
AllCount = new List<int>();
AllAccumlation = new List<int>();
int N = int.Parse(Console.ReadLine());
//int N = 100;
BigInt bInt;
for (int i = 0; i < N; i++)
{
bInt = new BigInt(Console.ReadLine());
//bInt = new BigInt((uint)i);
AllNumber.Add(bInt);
}
AllNumber.Sort(new BigIntCompare().Compare);
int hbit = AllNumber[AllNumber.Count - 1].FirstIndexOfOne();
int position = 0;
AllCount.Add(1);
AllAccumlation.Add(0);
for (int i = 1; i < AllNumber.Count; i++)
{
if (Compare(AllNumber[i], AllNumber[position], hbit) != 0)
{
position++;
AllNumber[position] = AllNumber[i];
AllAccumlation.Add(i);
AllCount.Add(1);
}
else
AllCount[position]++;
}
AllAccumlation.Add(AllNumber.Count);
AllNumber.RemoveRange(position + 1, AllNumber.Count - position - 1);
ulong sum = calculateSame(AllNumber.Count - 1, 0, hbit);
Console.WriteLine(sum << 1);
}
//计算相同的段
static ulong calculateSame(int start, int end, int depth)
{
if (start == end || PowerOf10Log[depth] == 0)
return 0;
int hBit = AllNumber[start].IndexOfOne(depth);
int power = PowerOf10Log[hBit];
if (power == 0)
return 0;
if (start == end + 1)
return (ulong)(LogXor(AllNumber[start], AllNumber[end], hBit) * AllCount[start] * AllCount[end]);
int mid = -1;
for (int i = start; i >= end; i--)
{
if (!AllNumber[i].BitNum(hBit))
{
mid = i;
break;
}
}
if (mid == -1)
return calculateSame(start, end, hBit - 1);
ulong sumUpper = calculateSame(start, mid + 1, hBit - 1);
ulong sumLower = calculateSame(mid, end, hBit - 1);
ulong sum = sumUpper + sumLower;
if (power < 0)
sum += (ulong)((AllAccumlation[start + 1] - AllAccumlation[mid + 1]) * (AllAccumlation[mid + 1] - AllAccumlation[end]) * power * -1);
else
sum += calculateDiff(start, mid + 1, mid, end, hBit, hBit - 1);
return sum;
}
//计算不同的两个段
static ulong calculateDiff(int startUpper, int endUpper, int startLower, int endLower, int hBit, int depth)
{
if (startUpper < endUpper || startLower < endLower)
return 0;
if (startUpper == endUpper && startLower == endLower)
return (ulong)(LogXor(AllNumber[startUpper], AllNumber[startLower], hBit) * AllCount[startUpper] * AllCount[startLower]);
int power = PowerOf10Log[hBit];
ulong sum = 0;
int midUpper = endUpper - 1;
int midLower = endLower - 1;
bool bit10 = PowerOf10[power].BitNum(depth);
for (int i = startUpper; i >= endUpper; i--)
{
if (!AllNumber[i].BitNum(depth))
{
midUpper = i;
break;
}
}
for (int i = startLower; i >= endLower; i--)
{
if (!AllNumber[i].BitNum(depth))
{
midLower = i;
break;
}
}
if (bit10)
{
//Xor为0的都=power-1
sum += (ulong)((AllAccumlation[startUpper + 1] - AllAccumlation[midUpper + 1]) * (AllAccumlation[startLower + 1] - AllAccumlation[midLower + 1]) * (power - 1));
sum += (ulong)((AllAccumlation[midUpper + 1] - AllAccumlation[endUpper]) * (AllAccumlation[midLower + 1] - AllAccumlation[endLower]) * (power - 1));
sum += calculateDiff(startUpper, midUpper + 1, midLower, endLower, hBit, depth - 1);
sum += calculateDiff(midUpper, endUpper, startLower, midLower + 1, hBit, depth - 1);
}
else
{
//Xor为1的都=power
sum += (ulong)((AllAccumlation[startUpper + 1] - AllAccumlation[midUpper + 1]) * (AllAccumlation[midLower + 1] - AllAccumlation[endLower]) * power);
sum += (ulong)((AllAccumlation[midUpper + 1] - AllAccumlation[endUpper]) * (AllAccumlation[startLower + 1] - AllAccumlation[midLower + 1]) * power);
sum += calculateDiff(startUpper, midUpper + 1, startLower, midLower + 1, hBit, depth - 1);
sum += calculateDiff(midUpper, endUpper, midLower, endLower, hBit, depth - 1);
}
return sum;
}
static void preOperation()
{
PowerOf10 = new List<BigInt>();
PowerOf10Log = new int[128];
BigInt bInt = new BigInt(1);
int hbit;
while (bInt != null)
{
hbit = bInt.FirstIndexOfOne();
PowerOf10Log[hbit] = PowerOf10.Count;
PowerOf10.Add(bInt);
bInt *= 10;
}
int current = 0;
for (int i = 0; i < PowerOf10Log.Length; i++)
{
if (PowerOf10Log[i] == 0)
PowerOf10Log[i] = current * -1;
else
current = PowerOf10Log[i];
}
}
static int LogXor(BigInt A, BigInt B, int hbit)
{
int startbit = hbit >> 5;
int current = startbit + 1;
uint firstbit = 0;
while (firstbit == 0)
{
current--;
firstbit = A.Items[current] ^ B.Items[current];
}
if (startbit == current)
startbit = hbit - (current << 5);
else
startbit = 31;
for (int i = startbit; i > 0; i--)
{
if (((firstbit >> i) & 1) == 1)
{
startbit = i;
break;
}
}
hbit = (current << 5) + startbit;
int power = PowerOf10Log[hbit];
if (power == 0)
return 0;
if (power < 0)
return power * -1;
if (firstbit > PowerOf10[power].Items[current])
return power;
if (firstbit < PowerOf10[power].Items[current])
return power - 1;
for (int i = current - 1; i >= 0; i--)
{
firstbit = A.Items[i] ^ B.Items[i];
if (PowerOf10[power].Items[i] > firstbit)
return power - 1;
else if (PowerOf10[power].Items[i] < firstbit)
return power;
}
return power;
}
static int Compare(BigInt x, BigInt y, int bit)
{
for (int i = bit >> 5; i >= 0; i--)
{
if (x.Items[i] > y.Items[i])
return 1;
else if (x.Items[i] < y.Items[i])
return -1;
}
return 0;
}
}

复制代码

wayne · 发表于 2009-3-20 09:07:25

( ⊙o⊙ )，都四更了，还在工作。。。

liangbch · 发表于 2009-3-20 09:31:59

楼主应该注意休息，经常熬夜对身体伤害很大。我现在尽量在12点前睡觉。

无心人 · 发表于 2009-3-20 10:22:33

宝宝也老熬夜啊？

litaoye · 发表于 2009-3-20 23:29:14

过了这段时间可能就好了,谢谢提醒!呵呵!

liangbch · 发表于 2009-3-22 14:59:59

原帖由 liangbch 于 2009-3-19 10:24 发表我从0.342优化到0.171,如果优化后速度再提高一倍，就是第一名了。按照楼主的思路，将XOR和log10这2步操作合在一起，用汇编语言写成一个函数，速度应该可以更快。等有时间我将尝试一下。

我现在最好成绩0.078,打破了shshsh_0510的0.093的记录。 1. 先完全汇编语言编写xorLog10,速度提高到0.14秒 2. 然后在计算之前，将至多5000个UINT128 进行了排序，速度不降反增，速度竟提高到0.109. 3.将xorLog10进行调整，减少了指令条数。测试结果为0.093或者0.078

litaoye · 发表于 2009-3-23 01:07:27

呵呵,这个成绩c#是无论如何也不可能打破的了!确实很棒! 最近发现这个时间误差有时还是挺大的!

无心人 · 发表于 2009-3-23 08:33:59

宝宝的代码我看看 logxor如果用xmm最合适了 log10我的还存在一个跳转无法消不知你的如何？ [hide=1000000000000000000000000000000000] 给我elflyao@foxmail.com [/hide]

无心人 · 发表于 2009-3-23 08:42:32

我想C++如果深度汇编优化，打入前三问题不大

无心人 · 发表于 2009-3-23 09:03:14

typedef struct
{
unsigned int d[4];
} UINT128;
unsigned int CompTable[128][2] = {0,0, 0,0, 0,0, 0,1, 1,0, 1,0, 1,1, 2,0, 2,0, 2,1, 3,0, 3,0, 3,0, 3,1,
4,0,4,0,4,1,5,0,5,0,5,1,6,0,6,0,6,0,6,1,7,0,7,0,7,1,8,0,8,0,
8,1,9,0,9,0,9,0,9,1,10,0,10,0,10,1,11,0,11,0,11,1,12,0,12,
0,12,0,12,1,13,0,13,0,13,1,14,0,14,0,14,1,15,0,15,0,15,0,15,
1,16,0,16,0,16,1,17,0,17,0,17,1,18,0,18,0,18,0,18,1,19,0,19,
0,19,1,20,0,20,0,20,1,21,0,21,0,21,0,21,1,22,0,22,0,22,1,23,
0,23,0,23,1,24,0,24,0,24,0,24,1,25,0,25,0,25,1,26,0,26,0,26,
1,27,0,27,0,27,0,27,1,28,0,28,0,28,1,29,0,29,0,29,1,30,0,30,
0,30,1,31,0,31,0,31,0,31,1,32,0,32,0,32,1,33,0,33,0,33,1,34,
0,34,0,34,0,34,1,35,0,35,0,35,1,36,0,36,0,36,1,37,0,37,0,37,0,37,1,38,0};
UINT128 pow10[40] = {1,0,0,0,
10,0,0,0,
100,0,0,0,
1000,0,0,0,
10000,0,0,0,
100000,0,0,0,
1000000,0,0,0,
10000000,0,0,0,
100000000,0,0,0,
1000000000,0,0,0,
1410065408,2,0,0,
1215752192,23,0,0,
3567587328,232,0,0,
1316134912,2328,0,0,
276447232,23283,0,0,
2764472320,232830,0,0,
1874919424,2328306,0,0,
1569325056,23283064,0,0,
2808348672,232830643,0,0,
2313682944,2328306436,0,0,
1661992960,1808227885,5,0,
3735027712,902409669,54,0,
2990538752,434162106,542,0,
4135583744,46653770,5421,0,
2701131776,466537709,54210,0,
1241513984,370409800,542101,0,
3825205248,3704098002,5421010,0,
3892314112,2681241660,54210108,0,
268435456,1042612833,542101086,0,
2684354560,1836193738,1126043566,1,
1073741824,1182068202,2670501072,12,
2147483648,3230747430,935206946,126,
0,2242703233,762134875,1262,
0,952195850,3326381459,12621,
0,932023908,3199043520,126217,
0,730304488,1925664130,1262177,
0,3008077584,2076772117,12621774,
0,16004768,3587851993,126217744,
0,160047680,1518781562,1262177448,
0, 0, 0, 0};
__declspec(naked)
int __fastcall log_10(UINT128 * n)
{
__asm
{
bsr edx, [ecx]
bsr eax, [ecx + 4]
add eax, 32
cmp [ecx + 4], 0
cmovne edx, eax
bsr eax, [ecx + 8]
add eax, 64
cmp [ecx + 8], 0
cmovne edx, eax
bsr eax, [ecx + 12]
add eax, 96
cmp [ecx + 12], 0
cmovne edx, eax
mov eax, dword ptr CompTable[8 * edx]
cmp dword ptr CompTable[8 * edx + 4], 0
je ex
add eax, 1
mov edx, eax
shl edx, 4
push ebx
mov ebx, [ecx]
sub ebx, pow10[edx]
mov ebx, [ecx + 4]
sbb ebx, pow10[edx + 4]
mov ebx, [ecx + 8]
sbb ebx, pow10[edx + 8]
mov ebx, [ecx + 12]
sbb ebx, pow10[edx + 12]
sbb ebx, ebx
add eax, ebx
pop ebx
ex:
ret
}
}

复制代码

log10的汇编版本

账号		自动登录	找回密码
密码			欢迎注册

[求助] 关于一个运算优化的问题

回复 151# litaoye 的帖子

回复 153# liangbch 的帖子

评分

回复 156# liangbch 的帖子