마음의 안정을 찾기 위하여 - URL Decode UTF8
2353864
308
575
관리자새글쓰기
태그위치로그방명록
별일없다의 생각
dawnsea's me2day/2010
색상(RGB)코드 추출기(Color...
Connection Generator/2010
최승호PD, '4대강 거짓말 검...
Green Monkey**/2010
Syng의 생각
syng's me2DAY/2010
천재 작곡가 윤일상이 기획,...
엘븐킹's Digital Factory/2010
URL Decode UTF8
Delphi | 2007/09/18 16:40

출처 :  http://koti.mbnet.fi/akini/delphi/urldecodeutf8/

Always use UTF8 or unicode strings if possible
Java and dotNet coders don't have to do anything special but "native language" coders must do few extra steps to overcome unicode problems.
Here is URLDecodeUTF8(ansistr, widestr) conversion method and example UI application. I have taken inspiration from Indy urldecode function and converted it to widechar enabled function. Indy urldecode support only string and legacy charset encoded strings.

Here is full sources and example UI exe to try it out. I have created dll library because I need to call it from c++ program. You can copypaste code to suit your needs. Test application uses TntUnicode components, see reference image.
orcoredll.zip sources and exe
dlltest.png reference image


Convert UTF8 encoded ansi string to widestring:

unit orcoredll_unit1;

interface

// unit's public functions. use "C-style" stdcall stack
// for better compatibility to other applications.
function WideStringToAnsiString(const pw: PWideChar; const buf: PAnsiChar;
      var lenBuf: Cardinal): boolean; stdcall;
function URLDecodeUTF8(const s: PAnsiChar; const buf: PWideChar;
      var lenBuf: Cardinal): boolean; stdcall;
function URLDecodeUTF8A(const s: PAnsiChar; const buf: PAnsiChar;
      var lenBuf: Cardinal): boolean; stdcall;

implementation

uses SysUtils;

{
  Convert unicode WideString to AnsiString
  @param pw       widestring to be converted
  @outparam buf   buffer for resulting ansistring
  @outparam lenBuf number of characters in buffer
  @return         true if conversion was done or false
                  if only lenBuf was updated
}
function WideStringToAnsiString(const pw: PWideChar; const buf: PAnsiChar;
      var lenBuf: Cardinal): boolean; stdcall;
var
   sa: AnsiString;
   len: Cardinal;
begin
   sa := WideCharToString(pw);
   len := Length(sa);

   if Assigned(buf) and (len < lenBuf) then begin
      // copy result into the buffer, buffer must have
      // space for last null byte.
      //    lenBuf=num of chars in buffer, not counting null
      if (len > 0) then
         Move(PAnsiChar(sa)^, buf^, len * SizeOf(AnsiChar));
      buf[len] := #0;
      lenBuf := len;
      Result := True;
   end else begin
      // tell calling program how big the buffer
      // shoule be to store all decoded characters,
      // including trailing null value.
      if (len > 0) then
         lenBuf := len+1;
      Result := False;
   end;
end;

{
  URLDecode utf8 encoded string. Resulting widestring
  is copied to inout 'buf' buffer.
  @param s         encoded string
  @outparam buf    buffer for decoded string or nil
                   if should update lenBuf only
  @outparam lenBuf num of characters stored to buffer
  @return          true if string was decoder or false
                   if only lenBuf was updated.
}
function URLDecodeUTF8(const s: PAnsiChar; const buf: PWideChar;
      var lenBuf: Cardinal): boolean; stdcall;
var
   sAnsi: String;    // normal ansi string
   sUtf8: String;    // utf8-bytes string
   sWide: WideString; // unicode string

   i, len: Cardinal;
   ESC: string[2];
   CharCode: integer;
   c: char;
begin
   sAnsi := s; // null-terminated str to pascal str
   SetLength(sUtf8, Length(sAnsi));

   // Convert URLEncoded str to utf8 str, it must
   // use utf8 hex escaping for non us-ascii chars
   //    +      = space
   //    %2A    = *
   //    %C3%84 = ? (A with diaeresis)
   i := 1;
   len := 1;
   while (i <= Cardinal(Length(sAnsi))) do begin
      if (sAnsi[i] <> '%') then begin
         if (sAnsi[i] = '+') then begin
            c := ' ';
         end else begin
            c := sAnsi[i];
         end;
         sUtf8[len] := c;
         Inc(len);
      end else begin
         Inc(i); // skip the % char
         ESC := Copy(sAnsi, i, 2); // Copy the escape code
         Inc(i, 1); // skip ESC, another +1 at end of loop
         try
            CharCode := StrToInt('$' + ESC);
            //if (CharCode > 0) and (CharCode < 256) then begin
               c := Char(CharCode);
               sUtf8[len] := c;
               Inc(len);
            //end;
         except end;
      end;
      Inc(i);
   end;
   Dec(len); // -1 to fix length (num of characters)
   SetLength(sUtf8, len);

   sWide := UTF8Decode(sUtf8); // utf8 string to unicode
   len := Length(sWide);

   if Assigned(buf) and (len < lenBuf) then begin
      // copy result into the buffer, buffer must have
      // space for last null byte.
      //    lenBuf=num of chars in buffer, not counting null
      if (len > 0) then
         Move(PWideChar(sWide)^, buf^, len * SizeOf(WideChar));
      buf[len] := #0;
      lenBuf := len;
      Result := True;
   end else begin
      // tell calling program how big the buffer
      // should be to store all decoded characters,
      // including trailing null value.
      if (len > 0) then
         lenBuf := len+1;
      Result := False;
   end;
end;

{
  URLDecode utf8 encoded string. Resulting ansistring
  is copied to inout 'buf' buffer.
  @param s         encoded string
  @outparam buf    buffer for decoded string or nil
                   if should update lenBuf only
  @outparam lenBuf num of characters stored to buffer
  @return          true if string was decoder or false
                   if only lenBuf was updated.
}
function URLDecodeUTF8A(const s: PAnsiChar; const buf: PAnsiChar;
      var lenBuf: Cardinal): boolean; stdcall;
var
   len: Cardinal;
   pw: PWideChar;
   ok : boolean;
begin
   // decode to widestring
   len := lenBuf * SizeOf(WideChar) + 1;
   pw := AllocMem(len);
   try
      ok := URLDecodeUTF8(s, pw, len);

      if Not(ok) then begin
         lenBuf := len; // num of chars in pw buffer
         Result := ok;
         Exit;
      end;

      // convert to ansistring
      len := len * SizeOf(AnsiChar) + 1;
      ok := WideStringToAnsiString(pw, buf, len);

      lenBuf := len;
      Result := ok;
   finally
      FreeMem(pw);
   end;
end;

end.

And then external application may use dll functions.

procedure TForm1.Button2Click(Sender: TObject);
var
   pw: PWideChar;
   pa: PAnsiChar;
   sa: AnsiString;
   sw: WideString;
   len: Cardinal;
begin
   sw := edtParam.Text;
   sa := sw;

   // allocate mem for inout widechar buffer +1 for NULL
   // widechar takes 2 bytes (16bit unicode char)
   len := Length(sa) * SizeOf(WideChar) + 1;
   pw := AllocMem(len);
   try
      URLDecodeUTF8(PAnsiChar(sa), pw, len);
      sw := pw; // copy PWideChar to WideString
   finally
      FreeMem(pw);
   end;
   memo.Lines.Add(sw + ', len: ' + IntToStr(len));

   // allocate mem for inout ansichar buffer +1 for NULL
   // ansichar takes one byte (8bit char)
   len := Length(sw) * SizeOf(AnsiChar) + 1;
   pa := AllocMem(len);
   try
      pw := PWideChar(sw);
      WideStringToAnsiString(pw, pa, len);
      sa := pa; // copy PAnsiChar to AnsiString
   finally
      FreeMem(pa);
   end;
   memo2.Lines.Add(sa + ', len: ' + IntToStr(len));

   // decode directly to AnsiString
   sw := edtParam.Text;
   sa := sw;
   len := Length(sa) * SizeOf(AnsiChar) + 1;
   pa := AllocMem(len);
   try
      URLDecodeUTF8A(PAnsiChar(sa), pa, len);
      sa := pa; // copy PAnsiChar to AnsiString
   finally
      FreeMem(pa);
   end;
   memo2.Lines.Add(sa + ', Alen: ' + IntToStr(len));
end;

2007/09/18 16:40 2007/09/18 16:40
Article tag list Go to top
View Comment 0
Trackback URL :: 이 글에는 트랙백을 보낼 수 없습니다
 
 
 
 
: [1] ... [714][715][716][717][718][719][720][721][722] ... [1323] :
«   2024/09   »
1 2 3 4 5 6 7
8 9 10 11 12 13 14
15 16 17 18 19 20 21
22 23 24 25 26 27 28
29 30          
전체 (1323)
출판 준비 (0)
My-Pro... (41)
사는 ... (933)
블로그... (22)
My Lib... (32)
게임 ... (23)
개발관... (3)
Smart ... (1)
Delphi (93)
C Builder (0)
Object... (0)
VC, MF... (10)
Window... (1)
Open API (3)
Visual... (0)
Java, JSP (2)
ASP.NET (0)
PHP (6)
Database (12)
리눅스 (29)
Windows (25)
Device... (1)
Embedded (1)
게임 ... (0)
Web Se... (2)
Web, S... (21)
잡다한... (7)
프로젝트 (0)
Personal (0)
대통령... (13)
Link (2)