Daviddr's Blog: 11月 2009

2009年11月30日星期一

水仙花數

　
Narcissus Number 指的是「各個位數的立方和等於原數」的數。
列出 100~1000 間的那斯索斯~
應該還有更聰明的羅列方法..

int main ()
{
    #define C n[x] = x*x*x; x++
    int a=1, b=0, c=0, x=0; 
    int n[10]; {C,C,C,C,C,C,C,C,C,C;} 
     
    for (x=99; ++x < 1000; 
        ++c > 9 && (c=0, ++b > 9) && (b=0, ++a)) 
        if (x == n[a]+n[b]+n[c]) cout <<' '<< x;
}

2009年11月26日星期四

stackoverflow Part 2

　
beautiful code 系列：

首先是廣為人知的 Carmack Q3:

float InvSqrt (float x)
{
    float xhalf = 0.5f*x;
    int i = *(int*)&x;
    i = 0x5f3759df - (i>>1);
    x = *(float*)&i;
    x = x*(1.5f - xhalf*x*x);
    return x;
}

Haskell 著名示例：

qsort [] = []
qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++ qsort (filter (>= x) xs)

談最佳化時常提及的 Duff's device，雖然只是 loop unrolling~

void strcpy (to, from, count)
register char *to, *from;
register int count;
{
    int n = (count + 7) / 8;
    switch (count % 8) {
      case 0: do { *to = *from++;
      case 7:      *to = *from++;
      case 6:      *to = *from++;
      case 5:      *to = *from++;
      case 4:      *to = *from++;
      case 3:      *to = *from++;
      case 2:      *to = *from++;
      case 1:      *to = *from++;
    } while (--n > 0);
  }
}

Evil C 裡的另一例：

switch (c&3) while ((c-=4)>=0) {
    foo(); case 3:
    foo(); case 2:
    foo(); case 1:
    foo(); case 0:
}

theycallhimtom post。給定 adjacency matrix，計算各個最短路徑：

for (int i = 0; i < adj.length; i++)
    for (int j = 0; j < adj.length; j++)
        for (int k = 0; k < adj.length; k++)
            adj[j][k] = min (adj[j][k], adj[j][i] + adj[i][k]);

位元倒置：

unsigned int reverse (register unsigned int x)
{
    x = ((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1);
    x = ((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2);
    x = ((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4);
    x = ((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8);
    return (x >> 16) | (x << 16);
}

ob-C Mandelbrot:

main(){float A,B,P,Q,X,Y,d;int i,D=80,n=3120;for(X=-2.,Y=-1.5,d=6./D;B=2*A
*B+Y,A=P-Q+X,n;((P=A*A)+(Q=B*B)>4||++i>D)&&putchar(*((n--%D?X+=d/2,i<D?i%11
:11:(X=-2.0,Y+=d,12))+"Mandelbrot! \n"))&&(A=B=P=Q=i=0));}

Another one

#include <unistd.h>;
float o=0.075,h=1.5,T,r,O,l,I;int _,L=80,s=3200;main(){for(;s%L||
(h-=o,T= -2),s;4 -(r=O*O)<(l=I*I)|++ _==L&&write(1,(--s%L?_<L?--_
%6:6:7)+"World! \n",1)&&(O=I=l=_=r=0,T+=o /2))O=I*2*O+h,I=l+T-r;}

Ken Perlin 更短的..

main(k){float i,j,r,x,y=-16;while(puts(""),y++<15)for(x
=0;x++<84;putchar(" .:-;!/>)|&IH%*#"[k&15]))for(i=k=r=0;
j=r*r-i*i-2+x/25,i=2*r*i+y/10,j*j+i*i<11&&k++<111;r=j);}

2009年11月25日星期三

stackoverflow.com

　
底下是改寫自 stackoverflow 的各種有趣寫法，
原始創意乃源出於不同作者：

Zeroing structs without memset:

FStruct s = {0};
或
FStruct s = {};

void search (char* query) {
    http://www.google.com/search?q=query
    return;
}

然後... Peter Mortensen 這樣寫到：

Fred* f = new(ram) Fred(); 
http://www.parashift.com/c++-faq-lite/dtors.html#faq-11.10
f->~Fred();

同樣是 Peter Mortensen，身份變換 :-)

class A {};
struct B {
    A a;
    operator A&() { return a; }
};
void func(A a) { }

int main()
{
    A a, c;
    B b;
    a = c;
    func (b);     //yeah baby
    a = b;        //gotta love this
}

Daniel Martin，Perl 運算式解析

sub e{($_)=@_;$n='( *-?[.\d]++ *)';
s:\($n\)|$n(.)$n:($1,0,$2*$4,$2+$4,0,$2-$4)[7&ord$3]//$2/$4:e?e($_):$_}

gnibbler，Golfscript 文字模式 Sierpinski’s Triangle:

' /\ /__\ '4/{).+: ;.{ \ ++}%\{.+}%+~ ]}@~(*n*

惡作劇用：

<td align="right" style="text-align: left">

也是惡作劇用，fork bomb：

:(){ :|:& };:

放在 source code 最上方.. 然後執行它:

//&>/dev/null;x="${0%.*}";[ ! "$x" -ot "$0" ]||(rm -f "$x";cc -o "$x" "$0")&&"$x" $*;exit

In Quake 2 src:

double normals[][] = {
    #include "normals.txt"
};

Multi-character constants:

enum state {
    stopped = 'STOP',
    running = 'RUN!',
    waiting = 'WAIT',
};

三元運算：

(a? b: c) = (b<c ? b: c);

Patrick post，同樣是三元運算 (q = b ? x : y)

bool b;
int  x, y;
int  q = -!!b&x|-!b&y;

算 PI:

double fldpi ()
{
    double pi;
    asm ("fldpi" : "=t" (pi));
    return pi;
}

Static linked lists：

struct llist { int a; struct llist* next;};
#define cons(x,y) (struct llist[]) {{x,y}}
struct llist *list = cons(1, cons(2, cons(3, cons(4, NULL))));

整數求和:

#define sum(...) \
    _sum(sizeof((int []){ __VA_ARGS__ })\
         / sizeof(int), (int []){ __VA_ARGS__ })

int _sum (size_t count, int values[])
{
    int s = 0;
    while (count--) s += values[count];
    return s;
}
int main () {printf("%i", sum (1,2,3));}

上例的變形：

void f (int len, int *n, char c) {...}
#define SIZEOF(a) sizeof(a)/sizeof*(a) 
#define NOPAREN(...) __VA_ARGS__ 
#define F1(arr,c)  {int v[]={NOPAREN arr}; f(SIZEOF(v),v,c);} 
#define F2(c,...)  {int v[]=__VA_ARGS__;   f(SIZEOF(v),v,c);}

再一種應用：

#include <algorithm>;
#define MAX(x, ...) ({\
        typeof(x) ra[] = {(x), __VA_ARGS__}; \
        *std::max_element (&ra[0], &ra[sizeof ra/sizeof 0[ra]]); \
    })
int main() {
    int i = 12;
    std::cout << MAX (i+1,1,3,6,i);
}

Johannes Schaub 寫的：

template <template From, template To>
union union_cast {
    From from;
    To   to;
    union_cast (From from): from(from) { }
    To getTo() const { return to; }
};

這是真的嗎 :-)

const MyClass& x = MyClass(); // temporary exists as long as x is in scope

Rube 累計遞增:

def foo(n)
  lambda { |i| n += i }
end

測驗題，不用空白來定義結構。By Bojan Resnik：

typedef/**/struct{const/**/char/**/c;unsigned/**/u;}nospace;

vividos 寫的：

int Func()
try
{
   // if success
   return 0;
}
catch(...)
{
   return -1;
}

goto 和 dtor 的關係：

struct A { ~A() {cout<<"dtor";} };
int main() {
    { A a; goto L; }
L:  cout<<"After L";
}

Evil C：

int logical_xor (int a, int b) {
    return !a != !b;
}

From Canadian Mind Products:

char *p;
switch (n)
{
  case 1:
    p = "one";
    if (0)
  case 2:
    p = "two";
    if (0)
  case 3:
    p = "three";
    printf ("%s", p);
    break;
}

自動初始化 a 中元素為 0（那麼，本來會做嗎？）:

class C
{
    int a[10];
    C(): a() {}
};

template bitfields，by Kaz Dragon:

template <size_t X, size_t Y>
struct bitfield
{
    char left  : X;
    char right : Y;
};

出自 Comptrol：

template <typename T> 
class Creator { 
    friend void appear() {  // a new function ::appear(), but it doesn't 
        …                   // exist until Creator is instantiated 
    } 
};
Creator<void> miracle;  // ::appear() is created at this point 
Creator<double> oops;   // ERROR: ::appear() is created a second time!

template <typename T> 
class Creator { 
    friend void feed(Creator<T>*){  // every T generates a different 
        …                           // function ::feed() 
    } 
}; 
Creator<void> one;     // generates ::feed(Creator<void>*) 
Creator<double> two;   // generates ::feed(Creator<double>*)

用於 non-tmpl 的類 boost::identity。
出自 Johannes Schaub：

template<typename T> 
struct id { typedef T type; };

// void (*f)(); // same
id<void()>::type *f;

// void (*f(void(*p)()))(int); // same
id<void(int)>::type *f(id<void()>::type *p);

// int (*p)[2] = new int[10][2]; // same
id<int[2]>::type *p = new int[10][2];

// void (C::*p)(int) = 0; // same
id<void(int)>::type C::*p = 0;

原載於 Sumant Tambe 的 Blog:

template<class T>   // (a) a base template
void f( T ) {
    std::cout << "f(T)\n";
}
template<>
void f<>(int*) {    // (b) an explicit specialization
    std::cout << "f(int *) specilization\n";
}
template<class T>   // (c) another, overloads (a)
void f( T* ) {
    std::cout << "f(T *)\n";
}
template<>
void f<>(int*) {    // (d) another identical explicit specialization
    std::cout << "f(int *) another specilization\n";
}

用複雜方法做簡單的事? 出自 Marcus Lindblom:

template<int> class foo;
template<> struct foo<0> {
    int* get() { return array; }
    int* array;  
};
template<int i> struct foo: public foo<i-1> {
    int* get() { return array + 1; }  
};

2009年11月11日星期三

Go

http://golang.org/doc/go_for_cpp_programmers.html
http://www.youtube.com/watch?v=rKnDgT73v8s

2009年11月8日星期日

資料統計圖

　
Windows 下有好幾種產生資料統計圖的方法：

(1) 用 Excel

#include <ole2.h>
.....
CLSID     clsid;
IDispatch pApp;
CoInitialize (0);
CLSIDFromProgID (L"Excel.Application", &clsid);
CoCreateInstance (clsid, 0, CLSCTX_LOCAL_SERVER, IID_IDispatch, (void**)&pApp);

(2) 用 wgunplot
將 data 存入 data.txt，繪圖命令存入 script.txt，
再調用：system ("wgunplot script.txt");

(3) 用 plplot 直接畫
http://plplot.sourceforge.net/examples.php

(4) R
http://www.r-project.org/
專用於 Statistical Computing。

這幾種方案中，(2) 的彈性、易用性最高。
若需要更多的統計計算，可採用 R。

2009年11月1日星期日

交換變數值

以下是一些交換變數值的方法。

C++ 中交換 2 變數常使用：std::swap

若要自己設計 swap 的話，
一種簡單、正確、且速度最快的寫法是：

template <typename T>
    inline void swap (T& a, T& b)
    {
        T t(a); a=b; b=t;
    }

C 語言中，若想支援各種類型的變數交換，
一個 (不是很好的) 方法是：


#define SWAP(T,a,b) {T t=a; a=b; b=t;}

驗證：

int main ()
{   
    float a=-88, b=57;   
    SWAP (float, a, b);
    printf ("\n a=%f  b=%f", a, b);

    char *c="alcohol", *d="water";   
    SWAP (char*, c, d);
    printf ("\n c=%s  d=%s", c, d);
}

除此之外，就只能老實將型別寫出來：


    void swap_int (int* a, int* b)  
    {
        int c = *a; *a = *b; *b = c;
    }

不過上面的寫法在 compiler 未最佳化前，
其實多浪費了兩個堆疊空間 (用來傳參數)。

若僅針對 4Bytes 型別做交換 (如char、float 與 int)，
又想寫個「通用」的 swap，一種有趣的寫法是：

void swap2 (void* a, void* b)
    {
        int    t = *(int*)a;
        *(int*)a = *(int*)b;
        *(int*)b = t;
    }

他的未最佳化組語有點長：


    int t = *(int*)a;      mov   eax,dword ptr [a]
                           mov   ecx,dword ptr [eax]
                           mov   dword ptr [t],ecx
    *(int*)a = *(int*)b;   mov   eax,dword ptr [a]
                           mov   ecx,dword ptr [ b ]
                           mov   edx,dword ptr [ecx]
                           mov   dword ptr [eax],edx
    *(int*)b = t;          mov   eax,dword ptr [ b ]
                           mov   ecx,dword ptr [t]
                           mov   dword ptr [eax],ecx

驗證：

int main ()
{   
    float a=-88.5, b=57.12;
    swap2 (&a, &b);
    printf ("\n a=%f  b=%f", a, b);

    char c='5', d='9';
    swap2 (&c, &d);
    printf ("\n c=%c  d=%c", c, d);
}

不調用函式的情況下，針對 1~4Bytes 型別，
非 MMX 之 x86 組語最快的寫法或許是：


    __asm {                               
        mov  eax, a               
        mov  ebx, b               
        mov  b  , eax               
        mov  a  , ebx               
    }

比較短，但慢些的寫法：
(因為 xchg 指令週期一般比 mov 慢好幾倍)


    _asm {
        mov  eax, a
        xchg eax, b
        mov  a  , eax
    }

同樣短，但更慢：


    _asm {              
        xchg  a, eax     
        xchg  b, eax     
        xchg  a, eax     
    }

比較慢，又額外浪費堆疊空間的寫法是：


    _asm{
        push  a
        push  b
        pop   a
        pop   b
    }

或是浮點數專用的：


    _asm{
        fld   a
        fld   b
        fstp  a
        fstp  b
    }

是否存在不使用額外變數，而能將變數值正確交換的方法呢？
理論上是...... 沒有。但存在一些限制性的做法：


    a ^= b ^= a ^= b;            (1)
    a -= b = (a+=b) - b;         (2)
    a -= b += a -= b = -b;       (3)
    a = a+b, b = a-b, a = a-b;   (4)
    a *= b, b = a/b, a /= b;     (5)

(2)(3)(4) 其實是相同的邏輯。
當 a 和 b 指向相同位址時，以上均得出 0。
(2)~(5) 都有可能發生 over/under flow，
此外，(5) 還有除零導致程式掛掉的問題。

交換「整個」陣列時，一般會先用指標代表原先陣列，
因此，只需交換「代表它」的指標，就能達成陣列的交換。
即使陣列長度不同，也能成功「交換」。

例如：

//原始陣列
int A[] =  { ... };     
int B[] =  { ... };

//用指標代表某個陣列
int * a = A;
int * b = B;

//交換所代表的陣列
int *t = a; a = b; b = t;

於是在 C 中可以這樣設計：

void swap (void** a, void** b)
{
    void* t = *a; *a = *b; *b = t;
}

void show (void* a, int n)
{
    int i, *p = (int*) a;          //此處強制定成 int
    for (i=0; i<n; ++i)
        printf (" %d", p[i]);
    puts ("");
}

int main()
{
    const int NUM = 8;
    int i; 
    int A[NUM] = {1,2,3,4,5,6,7,8};
    int B[NUM] = {11,12,13,14,15,16,17,18};

    void* a = A;
    void* b = B;

    show (a, NUM);
    show (b, NUM);

    swap (&a, &b);

    show (a, NUM);
    show (b, NUM);
}

更一般化地，我們可用 Widget 模式包覆在非基本類型上，
於是交換 2 個 Widget 時，只要交換其中的物件指標，便可達成物件交換。

把單個或多個元素，從某個起始位址「整批」移到另個位址，
有 2 種情況：
陣列間元素的「部分」抽換，使用 memcpy。
陣列內元素的「自身抽換」，使用 memmove。

自身抽換之元素位於頭尾時，便形成自旋：

void show (int* a, int n)
{
    for (int i=0; i<n; ++i)
        printf (" %d", a[i]);
    puts ("");
}

void left_rotate (int* a, int n)
{
    int t = a[0];
    memmove (a, a+1, (n-1) * sizeof(int));
    a[n-1] = t;
}

int main()
{
    const int NUM = 8;
    int i=3, a[NUM] = {1,2,3,4,5,6,7,8};
    
    while (i--)
        left_rotate (a, NUM), show (a, NUM);
}

時間檢測


這是個 nano-second 精度的計時器，所做的僅是計算 MSR 差值。
在 multi-core processors 下，
各 CPU 的 TSC 同步率若不高，計時結果也會有誤差，
故建構 TimeProfile 時，便設法讓行程在單一 CPU 上跑。

設計上很簡便，但存在一些缺點：

1. 當他前幾次執行時，算出來的時間間隔會比較長。
    因為 CPU 要花時間將算法的相關指令與資料 load 到 L2 甚至 L1 裡，
    並調整諸如分支預測等機制的最佳模式。

2. Process 會因 context switch 被切出去，但 MSR 仍持續累加，
    造成計時誤差。為避免此情形，可在檢測算法前，
    將 process 與其中的 main thread 之 priority 提高，
    待算完後再行回復：

    DWORD oldProcess = GetPriorityClass (GetCurrentProcess());
    DWORD oldThread  = GetThreadPriority (GetCurrentThread());

    SetPriorityClass (GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
    SetThreadPriority (GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
    ...........[Algorithm]...........
    SetThreadPriority (GetCurrentThread(), oldThread);
    SetPriorityClass (GetCurrentProcess(), oldProcess);

    但這步驟可能會讓其他 thread 無法順暢執行，
    為保持 TimeProfile 的「輕鬆」簡便，便不加上這層考量。

3. 現今許多 CPU 為了省電節能，當處於閒置狀態會降低主頻，
    如： Intel 的 EIST、AMD 的 C&Q，
    故 TimeProfile 中額外定義了 set_freq，
    以便在「需要」時重新讀取 CPU 頻率。

4. 需注意：真正的計時類別尚有許多細節待考慮，
    本程式僅是個以簡便為主的粗糙設計。

#include <stdio.h>
#include <windows.h>

class TimeProfile
{
    unsigned __int64 Hz;
    unsigned __int64 cycle()     //讀取 64-bit 的 
    {                            //Model Specific Register (MSR)
        _asm
        {
            xor   eax, eax
            _emit 0x0F           //使用 cpuid 讓 CPU 不採無序執行指令
            _emit 0xA2           //讓接下來的指令照流水線運行
            _emit 0x0F           //注意: 586 後的 IA32 才支援 RDTSC
            _emit 0x31           //結果置於 EDX:EAX
        }
    }
  public:
    void set_freq()
    {
        LARGE_INTEGER freq;
        QueryPerformanceFrequency (&freq);
        Hz = freq.QuadPart;
    }
    TimeProfile()
    {
        set_freq();
        //若 multi-core 同步率高, 可不用加下面那行
        SetThreadAffinityMask (GetCurrentThread(), 1);
    }
    double time;
    void start() {time = (double) cycle();}
    void end()   {time = double (cycle()-time) /Hz;}
};

測試碼：

double cal_pi (int i = 50)
{
   double pi = 2;
   for (; i>0; i--) 
       pi = pi * double(i)/(2*i+1) + 2;
   return pi;
}

int main (/* daviddr 081211 */)
{
    TimeProfile tp;   
    double pi;

    for (int i=1; i<52; i+=5)
    {
        tp.start();
        pi = cal_pi (i);
        tp.end();
        printf ("\n[%2d] PI = %.15f   Time: %.9f", i, pi, tp.time);
    }   
    return system ("pause");
}

從結果可以觀察到：第一次執行時，
因為 CPU 要做些「準備工作」，算出的時間便較長。


[ 1] PI = 2.666666666666667   Time: 0.000004494
[ 6] PI = 3.132156732156732   Time: 0.000001445
[11] PI = 3.141358472520136   Time: 0.000001507
[16] PI = 3.141586396037061   Time: 0.000001674
[21] PI = 3.141592479958224   Time: 0.000001756
[26] PI = 3.141592648659952   Time: 0.000001881
[31] PI = 3.141592653447636   Time: 0.000002006
[36] PI = 3.141592653585648   Time: 0.000002130
[41] PI = 3.141592653589671   Time: 0.000002255
[46] PI = 3.141592653589790   Time: 0.000002379
[51] PI = 3.141592653589793   Time: 0.000002504

訂閱：文章 (Atom)

Daviddr's Blog

2009年11月30日星期一

水仙花數

文字特效

2009年11月26日星期四

stackoverflow Part 2

2009年11月25日星期三

stackoverflow.com

2009年11月11日星期三

Go

2009年11月8日星期日

資料統計圖

2009年11月1日星期日

交換變數值

時間檢測

總網頁瀏覽量

內容引用事項

網誌存檔

關於我自己

追蹤者

熱門文章

2009年11月30日 星期一

2009年11月26日 星期四

2009年11月25日 星期三

2009年11月11日 星期三

2009年11月8日 星期日

2009年11月1日 星期日

總網頁瀏覽量

內容引用事項

網誌存檔

關於我自己

追蹤者

熱門文章

2009年11月30日星期一

2009年11月26日星期四

2009年11月25日星期三

2009年11月11日星期三

2009年11月8日星期日

2009年11月1日星期日