Форум: "Основная";
Текущий архив: 2006.11.19;
Скачать: [xml.tar.bz2];
ВнизUTF-8 > Win Найти похожие ветки
← →
markers © (2006-10-10 16:02) [0]Как определить Subj и конвертнуть в Windows-1251 Code Page?
Пасибо заранее!
← →
clickmaker © (2006-10-10 16:06) [1]function UnicodeToUtf8(Dest: PChar; MaxDestBytes: Cardinal; Source: PWideChar; SourceChars: Cardinal): Cardinal;
var
i, count: Cardinal;
c: Cardinal;
begin
Result := 0;
if Source = nil then Exit;
count := 0;
i := 0;
if Dest <> nil then
begin
while (i < SourceChars) and (count < MaxDestBytes) do
begin
c := Cardinal(Source[i]);
Inc(i);
if c <= $7F then
begin
Dest[count] := Char(c);
Inc(count);
end
else if c > $7FF then
begin
if count + 3 > MaxDestBytes then
break;
Dest[count] := Char($E0 or (c shr 12));
Dest[count + 1] := Char($80 or ((c shr 6) and $3F));
Dest[count + 2] := Char($80 or (c and $3F));
Inc(count, 3);
end
else // $7F < Source[i] <= $7FF
begin
if count + 2 > MaxDestBytes then
break;
Dest[count] := Char($C0 or (c shr 6));
Dest[count + 1] := Char($80 or (c and $3F));
Inc(count, 2);
end;
end;
if count >= MaxDestBytes then count := MaxDestBytes - 1;
Dest[count] := #0;
end
else
begin
while i < SourceChars do
begin
c := Integer(Source[i]);
Inc(i);
if c > $7F then
begin
if c > $7FF then
Inc(count);
Inc(count);
end;
Inc(count);
end;
end;
Result := count + 1; // convert zero based index to byte count
end;
function Utf8ToUnicode(Dest: PWideChar; MaxDestChars: Cardinal; Source: PChar; SourceBytes: Cardinal): Cardinal;
var
i, count: Cardinal;
c: Byte;
wc: Cardinal;
begin
if Source = nil then
begin
Result := 0;
Exit;
end;
Result := Cardinal(-1);
count := 0;
i := 0;
if Dest <> nil then
begin
while (i < SourceBytes) and (count < MaxDestChars) do
begin
wc := Cardinal(Source[i]);
Inc(i);
if (wc and $80) <> 0 then
begin
if i >= SourceBytes then Exit; // incomplete multibyte char
wc := wc and $3F;
if (wc and $20) <> 0 then
begin
c := Byte(Source[i]);
Inc(i);
if (c and $C0) <> $80 then Exit; // malformed trail byte or out of range char
if i >= SourceBytes then Exit; // incomplete multibyte char
wc := (wc shl 6) or (c and $3F);
end;
c := Byte(Source[i]);
Inc(i);
if (c and $C0) <> $80 then Exit; // malformed trail byte
Dest[count] := WideChar((wc shl 6) or (c and $3F));
end
else
Dest[count] := WideChar(wc);
Inc(count);
end;
if count >= MaxDestChars then count := MaxDestChars - 1;
Dest[count] := #0;
end
else
begin
while (i < SourceBytes) do
begin
c := Byte(Source[i]);
Inc(i);
if (c and $80) <> 0 then
begin
if i >= SourceBytes then Exit; // incomplete multibyte char
c := c and $3F;
if (c and $20) <> 0 then
begin
c := Byte(Source[i]);
Inc(i);
if (c and $C0) <> $80 then Exit; // malformed trail byte or out of range char
if i >= SourceBytes then Exit; // incomplete multibyte char
end;
c := Byte(Source[i]);
Inc(i);
if (c and $C0) <> $80 then Exit; // malformed trail byte
end;
Inc(count);
end;
end;
Result := count + 1;
end;
function Utf8Encode(const WS: WideString): UTF8String;
var
L: Integer;
Temp: UTF8String;
begin
Result := "";
if WS = "" then Exit;
SetLength(Temp, Length(WS) * 3); // SetLength includes space for null terminator
L := UnicodeToUtf8(PChar(Temp), Length(Temp) + 1, PWideChar(WS), Length(WS));
if L > 0 then
SetLength(Temp, L - 1)
else
Temp := "";
Result := Temp;
end;
function Utf8Decode(const S: UTF8String): WideString;
var
L: Integer;
Temp: WideString;
begin
Result := "";
if S = "" then Exit;
SetLength(Temp, Length(S));
L := Utf8ToUnicode(PWideChar(Temp), Length(Temp) + 1, PChar(S), Length(S));
if L > 0 then
SetLength(Temp, L - 1)
else
Temp := "";
Result := Temp;
end;
function AnsiToUtf8(const S: string): UTF8String;
begin
Result := Utf8Encode(S);
end;
function Utf8ToAnsi(const S: UTF8String): string;
begin
Result := Utf8Decode(S);
end;
← →
markers © (2006-10-10 16:11) [2]to clickmaker
Спасибо! А может есть у кого кодик определения UTF8 там или Win?
← →
han_malign © (2006-10-10 16:21) [3]
// Scans a string encoded in UTF-8 to verify that it contains
// only valid sequences. It will return 1 if the string contains
// only legitimate encoding sequences; otherwise it will return 0;
// From "Secure Programming Cookbook", John Viega & Matt Messier, 2003
function IsUtf8(aStr: string): boolean;
var fASCII: boolean;
i, nb, len: integer;
b: byte;
pc: PChar;
begin
Result:= aStr <> "";
if(Result)then begin
fASCII:= true; Result:= false;
len:= Length(aStr);
pc:= PChar(aStr);
while(len > 0)do begin
nb:= 0;
b:= Byte(pc^);
if(b and $80 <> 0)then begin
fASCII:= false;
{>>} if((b and $c0) = $80)then exit
else if((b and $e0) = $c0)then nb := 1
else if((b and $f0) = $e0)then nb := 2
else if((b and $f8) = $f0)then nb := 3
else if((b and $fc) = $f8)then nb := 4
else if((b and $fe) = $fc)then nb := 5
else exit;
if(len < nb)then exit;
for i:= 1 to nb do begin
if(Byte(pc[i]) and $C0 <> $80)then exit;
end;
end;
inc(pc, nb+1);
dec(len, nb+1);
end;
Result:= not fASCII;
end;
end;
if(IsUtf8(str))then str:= System.UTF8ToAnsi(UTF8String(str));
← →
markers © (2006-10-10 16:28) [4]to han_malign
Боольшущее спасибо всем :)
Страницы: 1 вся ветка
Форум: "Основная";
Текущий архив: 2006.11.19;
Скачать: [xml.tar.bz2];
Память: 0.47 MB
Время: 0.056 c