符号位扩展问题总结

由如下的一段代码作为本篇文章的引子，也可以通过它看出在写代码时，注意数值转换的重要性。这段代码是将各种类型的数值与unsigned int -1值对比，请注意对比后的结果。

     
             1  
             #include <stdio.h>
         
             2  
              
             3  
             #define comp_printf(FIRST, SECOND) printf("LINE%02d:  "#FIRST" %s "#SECOND"\n", __LINE__, FIRST == SECOND?"==":"!=")
         
             4  
              
             5  
             int main()
         
             6  
             {
         
             7  
                 const unsigned int ui_pos = static_cast<unsigned int>(-1);
         
             8  
              
             9  
                 char ci_pos_1 = ui_pos;
         
             10  
                 comp_printf(ui_pos, ci_pos_1);
         
             11  
                 char ci_pos_2 = static_cast<char>(-1);
         
             12  
                 comp_printf(ui_pos, ci_pos_2);
         
             13  
              
             14  
                 unsigned char uc_pos_1 = ui_pos;
         
             15  
                 comp_printf(ui_pos, uc_pos_1);
         
             16  
                 unsigned char uc_pos_2 = static_cast<unsigned char>(-1);
         
             17  
                 comp_printf(ui_pos, uc_pos_2);
         
             18  
              
             19  
                 short si_pos_1 = ui_pos;
         
             20  
                 comp_printf(ui_pos, si_pos_1);
         
             21  
                 short si_pos_2 = static_cast<short>(-1);
         
             22  
                 comp_printf(ui_pos, si_pos_2);
         
             23  
              
             24  
                 unsigned short us_pos_1 = ui_pos;
         
             25  
                 comp_printf(ui_pos, us_pos_1);
         
             26  
                 unsigned short us_pos_2 = static_cast<unsigned short>(-1);
         
             27  
                 comp_printf(ui_pos, us_pos_2);
         
             28  
                 
             29  
                 unsigned long ul_pos_1 = ui_pos;
         
             30  
                 comp_printf(ui_pos, ul_pos_1);
         
             31  
                 unsigned long ul_pos_2 = static_cast<unsigned long>(-1);
         
             32  
                 comp_printf(ui_pos, ul_pos_2);
         
             33  
              
             34  
                 long long ll_pos_1 = ui_pos;
         
             35  
                 comp_printf(ui_pos, ll_pos_1);
         
             36  
                 long long ll_pos_2 = static_cast<long long>(-1);
         
             37  
                 comp_printf(ui_pos, ll_pos_2);
         
             38  
                     
             39  
                 unsigned long long ull_pos_1 = ui_pos;
         
             40  
                 comp_printf(ui_pos, ull_pos_1);
         
             41  
                 unsigned long long ull_pos_2 = static_cast<unsigned long long>(-1);
         
             42  
                 comp_printf(ui_pos, ull_pos_2);
         
             43  
              
             44  
                 return 0;
         
             45  
             }
         
             46

上述代码在GCC3.3.4环境编译执行结果如下：

LINE10: ui_pos == ci_pos_1
LINE12: ui_pos == ci_pos_2
LINE15: ui_pos != uc_pos_1
LINE17: ui_pos != uc_pos_2
LINE20: ui_pos == si_pos_1
LINE22: ui_pos == si_pos_2
LINE25: ui_pos != us_pos_1
LINE27: ui_pos != us_pos_2
LINE30: ui_pos == ul_pos_1
LINE32: ui_pos == ul_pos_2
LINE35: ui_pos == ll_pos_1
LINE37: ui_pos != ll_pos_2
LINE40: ui_pos == ull_pos_1
LINE42: ui_pos != ull_pos_2

由上述的结果我们得出了一个结论，不同类型数字的对比，-1和-1并不都是相等的。究其原因就是对比数字需要将二者转换到同一种类型，而在转换过程中可能存在符号位扩展，由此导致值不尽相同的现象。

LINE25、27在对比前us_pos_1和us_pos_2的值都为0xFFFF，而ui_pos的值为0xFFFFFFFF。当unsigned short和unsigned int对比时，前者向后者的类型转换进行0扩展，也就是0x0000FFFF和0xFFFFFFFF对比，很显然二者是不相同的。

LINE20、22与LINE25、27的不同之处仅在于是它们无符号数，可结果为什么这么大的差别？在对比前si_pos_1和si_pos_2的值都为0xFFFF。当short和unsigned int对比时，前者向后者的类型转换进行1扩展，也就是0xFFFFFFFF和0xFFFFFFFF对比，所以对比的结果为真。

LINE35结果为真的原因是：在赋值时unsigned int 向 long long类型转换，进行0扩展，相当于ll_pos_1=0x00000000FFFFFFFF。在对比时unsigned int再次向long long类型转换，相当于0x00000000FFFFFFFF（us_pos转换后的结果）与0x00000000FFFFFFFF（就是ll_pos_1）对比，所以二者相同。

LINE37结果为假的原因是：在赋值时-1向long long类型转换，-1作为有符号数进行1扩展，相当于ll_pos_1=0xFFFFFFFFFFFFFFFF。在对比时unsigned int向long long类型转换，相当于0x00000000FFFFFFFF（us_pos转换后的结果）与0xFFFFFFFFFFFFFFFF（就是ll_pos_1）对比，显然二者不同。

实际错误例子

由于存在类型转换，对比时如果使用了不恰当的数字类型，极有可能会与自己预想的结果不一样。比如下面的这种情况。

     
             1  
             std::string s_string("teststring");
         
             2  
             unsigned short us_pos = s_string.find("google");
         
             3  
             if (us_pos == std::string::npos)
         
             4  
                 printf("'google' is not found  in 'teststring'\n");
         
             5  
             else
         
             6  
                 printf("'google' is found in 'teststring'\n");

我们先分析下各值的情况，一般情况下std::string::npos = (size_t)-1，而size_t一般为unsigned int类型。所以us_pos的初始值被赋为(unsigned int)-1，us_pos = 0xFFFF。在对比时us_pos向unsigned int类型转换，相当于0x0000FFFF（us_pos转换后的值）与0xFFFFFFFF（std::string::npos的值）对比。这时打印的信息是：'google’ is found in ‘teststring’。这个结果显然是不对的。正确的情况应该是：std::string::size_type pos = s_string.find(“google”);

转换规则总结

下面我们总结下不同类型数字间转换的规则，以后的编程时请注意数字转换带来的影响。

原类型		转换后类型	转换规则
char	-->	unsigned char	最高位变成数据位，不再表示符号位
char	-->	short	符号位扩展
char	-->	unsigned short	符号位扩展（先符号位扩展到short；然后由short转成unsigned short）
char	-->	long	符号位扩展
char	-->	unsigned long	符号位扩展（先符号位扩展到long；然后由long转成unsigned long）
char	-->	float	先符号位扩展到long；然后由long转成float
char	-->	double	先符号位扩展到long；然后由long转成double
char	-->	long double	先符号位扩展到long；然后由long转成long double
short	-->	char	保留低字节（short si = 0x1234; char ci = si; ci实际为0x34）
short	-->	unsigned char	保留低字节
short	-->	unsigned short	最高位变成数据位，不再表示符号位
short	-->	long	符号位扩展
short	-->	unsigned long	符号位扩展（先符号位扩展到long；然后由long转成unsigned long）
short	-->	long long	符号位扩展
short	-->	unsigned long long	符号位扩展（先符号位扩展到long long；然后再转成unsigned long long）
short	-->	float	先符号位扩展到long；然后由long转成float
short	-->	double	先符号位扩展到long；然后由long转成double
short	-->	long doubile	先符号位扩展到long；然后由long转成long double
int	-->	long long	符号位扩展
int	-->	unsigned long long	符号位扩展（先符号位扩展到long long；然后再转成unsigned long long）
long	-->	char	保留低字节
long	-->	unsigned char	保留低字节
long	-->	short	保留低字节
long	-->	unsigned short	保留低字节
long	-->	unsigned long	最高位变成数据位，不再表示符号位
long	-->	long long	符号位扩展
long	-->	unsigned long long	符号位扩展（先符号位扩展到long long；然后再转成unsigned long long）
long	-->	float	使用单精度浮点数表示，可能丢失精度。
long	-->	double	使用双精度浮点数表示，可能丢失精度。
long	-->	long double	使用双精度浮点数表示，可能丢失精度。

unsigned char	-->	char	最高位作为符号位
unsigned char	-->	short	0扩展
unsigned char	-->	unsigned short	0扩展
unsigned char	-->	int	0扩展
unsigned char	-->	unsigned int	0扩展
unsigned char	-->	long	0扩展
unsigned char	-->	unsigned long	0扩展
unsigned char	-->	float	先转换到long然后再转换到float
unsigned char	-->	double	先转换到long然后再转换到double
unsigned short	-->	char	保留低字节
unsigned short	-->	unsigned char	保留低字节
unsigned short	-->	short	最高位作为符号位
unsigned short	-->	long	0扩展
unsigned short	-->	unsigned long	0扩展
unsigned short	-->	float	先转换到long然后再转换到float
unsigned short	-->	double	先转换到long然后再转换到double
unsigned short	-->	long double	先转换到long然后再转换到long double
unsigned long	-->	char	保留低字节
unsigned long	-->	unsigned char	保留低字节
unsigned long	-->	short	保留低字节
unsigned long	-->	unsigned short	保留低字节
unsigned long	-->	long	最高位作为符号位
unsigned long	-->	float	转换到long再转换到float
unsigned long	-->	double	直接转换到double
unsigned long	-->	long double	转换到long再转换到long double

参考文章：
1、http://www.leskysoft.com/bbs/dispbbs.asp?boardid=5&id=1&page=1&star=1

符号位扩展问题总结

1	#include <stdio.h>
2
3	#define comp_printf(FIRST, SECOND) printf("LINE%02d: "#FIRST" %s "#SECOND"\n", __LINE__, FIRST == SECOND?"==":"!=")
4
5	int main()
6	{
7	const unsigned int ui_pos = static_cast<unsigned int>(-1);
8
9	char ci_pos_1 = ui_pos;
10	comp_printf(ui_pos, ci_pos_1);
11	char ci_pos_2 = static_cast<char>(-1);
12	comp_printf(ui_pos, ci_pos_2);
13
14	unsigned char uc_pos_1 = ui_pos;
15	comp_printf(ui_pos, uc_pos_1);
16	unsigned char uc_pos_2 = static_cast<unsigned char>(-1);
17	comp_printf(ui_pos, uc_pos_2);
18
19	short si_pos_1 = ui_pos;
20	comp_printf(ui_pos, si_pos_1);
21	short si_pos_2 = static_cast<short>(-1);
22	comp_printf(ui_pos, si_pos_2);
23
24	unsigned short us_pos_1 = ui_pos;
25	comp_printf(ui_pos, us_pos_1);
26	unsigned short us_pos_2 = static_cast<unsigned short>(-1);
27	comp_printf(ui_pos, us_pos_2);
28
29	unsigned long ul_pos_1 = ui_pos;
30	comp_printf(ui_pos, ul_pos_1);
31	unsigned long ul_pos_2 = static_cast<unsigned long>(-1);
32	comp_printf(ui_pos, ul_pos_2);
33
34	long long ll_pos_1 = ui_pos;
35	comp_printf(ui_pos, ll_pos_1);
36	long long ll_pos_2 = static_cast<long long>(-1);
37	comp_printf(ui_pos, ll_pos_2);
38
39	unsigned long long ull_pos_1 = ui_pos;
40	comp_printf(ui_pos, ull_pos_1);
41	unsigned long long ull_pos_2 = static_cast<unsigned long long>(-1);
42	comp_printf(ui_pos, ull_pos_2);
43
44	return 0;
45	}
46

1	std::string s_string("teststring");
2	unsigned short us_pos = s_string.find("google");
3	if (us_pos == std::string::npos)
4	printf("'google' is not found in 'teststring'\n");
5	else
6	printf("'google' is found in 'teststring'\n");