From cec3ebb4cb8ff9ed18212ae2c0e255345906fe35 Mon Sep 17 00:00:00 2001 From: Massimo Melina Date: Sun, 3 May 2020 23:42:33 +0200 Subject: [PATCH] fix: unicode support for VFS files --- classesLib.pas | 21 ++++++++++----- hslib.pas | 45 +++++++++++-------------------- main.dfm | 1 - main.pas | 73 ++++++++++++++++++++++++++++++-------------------- utillib.pas | 28 ++++++++++--------- 5 files changed, 90 insertions(+), 78 deletions(-) diff --git a/classesLib.pas b/classesLib.pas index 05f060a..f93c6bd 100644 --- a/classesLib.pas +++ b/classesLib.pas @@ -193,12 +193,12 @@ type Ttlv = class protected cur, bound: integer; - whole, lastValue: ansistring; + whole, lastRaw: ansistring; stack: array of integer; stackTop: integer; public procedure parse(data:ansistring); - function pop(var value:ansistring):integer; + function pop(var value:string; var raw:ansiString):integer; function down():boolean; function up():boolean; function getTotal():integer; @@ -208,6 +208,8 @@ type function getTheRest():ansistring; end; +const TLV_UTF8_FLAG = $1000000; + implementation uses @@ -1144,7 +1146,7 @@ bound:=length(data); stackTop:=0; end; // parse -function Ttlv.pop(var value:ansistring):integer; +function Ttlv.pop(var value:string; var raw:ansistring):integer; var n: integer; begin @@ -1152,8 +1154,15 @@ result:=-1; if isOver() then exit; // finished result:=integer((@whole[cur])^); n:=Pinteger(@whole[cur+4])^; -value:=copy(whole, cur+8, n); -lastValue:=value; +raw:=copy(whole, cur+8, n); +lastRaw:=raw; +if result and TLV_UTF8_FLAG = 0 then + value:=string(raw) +else + begin + dec(result, TLV_UTF8_FLAG); + value:=UTF8toString(raw); + end; inc(cur, 8+n); end; // pop @@ -1174,7 +1183,7 @@ stack[stackTop]:=bound; inc(stackTop); bound:=cur; -dec(cur, length(lastValue)); +dec(cur, length(lastRaw)); result:=true; end; // down diff --git a/hslib.pas b/hslib.pas index 25f83c1..e7b9260 100644 --- a/hslib.pas +++ b/hslib.pas @@ -313,9 +313,8 @@ function encodeURL(url:string; nonascii:boolean=TRUE; spaces:boolean=TRUE; // returns true if address is not suitable for the internet function isLocalIP(ip:string):boolean; // base64 encoding -function base64encode(s:string):string; -function base64decode(s:string):string; -function base64decodeA(s:string):Tbytes; +function base64encode(s:ansistring):ansistring; +function base64decode(s:ansistring):ansistring; // an ip address where we are listening function getIP():string; // ensure a string ends with a specific string @@ -445,7 +444,7 @@ begin until false; end; // nonQuotedPos -function base64encode(s:string):string; +function base64encode(s:ansistring):ansistring; const TABLE='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; type @@ -469,7 +468,7 @@ if length(s) mod 3 > 0 then +ifThen(length(s) mod 3=1,'==',TABLE[1+(p[1] and 15) shl 2+p[2] shr 6]+'='); end; // base64encode -function base64decode(s:string):string; +function base64decode(s:ansistring):ansistring; const TABLE:array[#43..#122] of byte=( 62,0,0,0,63,52,53,54,55,56,57,58,59,60,61,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7, @@ -490,31 +489,6 @@ while i <= length(s) do end; end; // base64decode -function base64decodeA(s:string):Tbytes; -const - TABLE:array[#43..#122] of byte=( - 62,0,0,0,63,52,53,54,55,56,57,58,59,60,61,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7, - 8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,0,0,0,0,0,0,26,27,28, - 29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51); -var - i, n: integer; -begin -setLength(result, length(s)); -i:=1; -n:=0; -while i <= length(s) do - begin - result[n]:=TABLE[s[i]] shl 2+TABLE[s[i+1]] shr 4; - inc(n); - result[n]:=ifThen(s[i+2]<>'=', TABLE[s[i+1]] shl 4+TABLE[s[i+2]] shr 2); - inc(n); - result[n]:=ifThen(s[i+3]<>'=', TABLE[s[i+2]] shl 6+TABLE[s[i+3]]); - inc(n); - inc(i,4); - end; -setLength(result, n); -end; // base64decode - function decodeURL(url:ansistring; utf8:boolean=TRUE):string; var i, j: integer; @@ -549,8 +523,11 @@ function encodeURL(url:string; nonascii:boolean=TRUE; spaces:boolean=TRUE; var i: integer; encodePerc, encodeUni: set of char; + a: ansistring; begin result:=''; +if url = '' then + exit; encodeUni:=[]; if nonascii then encodeUni:=[#128..#255]; encodePerc:=[#0..#31,'#','%','?','"','''','&','<','>',':']; @@ -561,6 +538,14 @@ if not unicode then encodePerc:=encodePerc+encodeUni; encodeUni:=[]; end; +if nonascii then + begin + a:=UTF8encode(url); // couldn't find a better way to force url to have the UTF8 encoding + i:=length(a); + setLength(url, i); + for i := 1 to i do + url[i]:=char(a[i]); + end; for i:=1 to length(url) do if charInSet(url[i], encodePerc) then result:=result+'%'+intToHex(ord(url[i]),2) diff --git a/main.dfm b/main.dfm index 2ffcdab..9099ad7 100644 --- a/main.dfm +++ b/main.dfm @@ -2906,7 +2906,6 @@ object mainFrm: TmainFrm object encodenonasciiChk: TMenuItem AutoCheck = True Caption = 'Encode non-ASCII characters' - Checked = True end object encodePwdUrlChk: TMenuItem AutoCheck = True diff --git a/main.pas b/main.pas index e759123..e91258e 100644 --- a/main.pas +++ b/main.pas @@ -2637,8 +2637,6 @@ end; // getIconForTreeview function encodeURL(s:string; fullEncode:boolean=FALSE):string; begin -if fullEncode or mainFrm.encodenonasciiChk.checked then - s:=ansiToUTF8(s); result:=HSlib.encodeURL(s, mainFrm.encodeNonasciiChk.checked, fullEncode or mainFrm.encodeSpacesChk.checked) end; // encodeURL @@ -6318,7 +6316,8 @@ type E_ZIP: begin result:=zCompressStr2(s, zcMax); - if length(result) > round(0.95*length(s)) then result:=s; + if length(result) > round(0.95*length(s)) then + result:=s; result:=base64encode(result); end; end; @@ -6634,9 +6633,14 @@ var end; end; // loadBanlist - function unzip(s:string):string; + function unzip(s:ansistring):ansistring; + var a: Tbytes; begin - try result:=ZDecompressStr(base64decodeA(s)); + try + s:=base64decode(s); + setLength(a, length(s)); + move(s[1], a[0], length(s)); + result:=ZDecompressStr(a) except end; end; // unzip @@ -9115,6 +9119,7 @@ const FK_CRC = 6; FK_COMMENT = 7; FK_USERPWD = 8; + FK_USERPWD_UTF8 = 108; FK_ADDEDTIME = 9; FK_DLCOUNT = 10; FK_ROOT = 11; @@ -9137,7 +9142,7 @@ const function Tmainfrm.getVFS(node:Ttreenode=NIL):ansistring; - function getAutoupdatedFiles():string; + function getAutoupdatedFiles():ansistring; var i: integer; fn: string; @@ -9164,7 +9169,7 @@ f:=nodeToFile(node); commonFields:=TLV(FK_FLAGS, str_(f.flags)) +TLV_NOT_EMPTY(FK_RESOURCE, f.resource) +TLV_NOT_EMPTY(FK_COMMENT, f.comment) - +if_(f.user>'', TLV(FK_USERPWD, base64encode(f.user+':'+f.pwd))) + +if_(f.user>'', TLV(FK_USERPWD_UTF8, base64encode(f.user+':'+f.pwd))) +TLV_NOT_EMPTY(FK_ACCOUNTS, join(';',f.accounts[FA_ACCESS]) ) +TLV_NOT_EMPTY(FK_UPLOADACCOUNTS, join(';',f.accounts[FA_UPLOAD])) +TLV_NOT_EMPTY(FK_DELETEACCOUNTS, join(';',f.accounts[FA_DELETE])) @@ -9211,6 +9216,7 @@ resourcestring var data: ansistring; + s: string; f: Tfile; after: record resetLetBrowse: boolean; @@ -9220,17 +9226,18 @@ var procedure parseAutoupdatedFiles(data:ansistring); var - s, fn: ansistring; + s, fn: string; + raw: ansiString; begin autoupdatedFiles.Clear(); tlv.down(); - while tlv.pop(s) = FK_NODE do + while tlv.pop(s,raw) = FK_NODE do begin tlv.down(); while not tlv.isOver() do - case tlv.pop(s) of + case tlv.pop(s,raw) of FK_NAME: fn:=s; - FK_DLCOUNT: autoupdatedFiles.setInt(fn, int_(s)); + FK_DLCOUNT: autoupdatedFiles.setInt(fn, int_(raw)); end; tlv.up(); end; @@ -9253,7 +9260,7 @@ f.node:=node; tlv:=Ttlv.create; tlv.parse(vfs); while not tlv.isOver() do - case tlv.pop(data) of + case tlv.pop(s,data) of FK_ROOT: begin setVFS(data, rootNode ); @@ -9278,7 +9285,8 @@ while not tlv.isOver() do { I was unable to reproduce the bug, but i found that correct data doesn't complain if i add an extra #0. } try data:=ZDecompressStr(bytesOf(data+#0)); - if isAnyMacroIn(data) then loadingVFS.macrosFound:=TRUE; + if isAnyMacroIn(data) then + loadingVFS.macrosFound:=TRUE; setVFS(data, node); except msgDlg(MSG_ZLIB, MB_ICONERROR) end; FK_FORMAT_VER: @@ -9304,15 +9312,15 @@ while not tlv.isOver() do if msgDlg(MSG_BADCRC+MSG_BETTERSTOP,MB_ICONERROR+MB_YESNO) = IDYES then exit; end; - FK_RESOURCE: f.resource:=data; + FK_RESOURCE: f.resource:=s; FK_NAME: begin - f.name:=data; - node.text:=data; + f.name:=s; + node.text:=s; end; FK_FLAGS: move(data[1], f.flags, length(data)); FK_ADDEDTIME: f.atime:=dt_(data); - FK_COMMENT: f.comment:=data; + FK_COMMENT: f.comment:=s; FK_USERPWD: begin data:=base64decode(data); @@ -9320,17 +9328,24 @@ while not tlv.isOver() do f.pwd:=data; usersInVFS.track(f.user, f.pwd); end; + FK_USERPWD_UTF8: + begin + s:=UTF8toString(base64decode(data)); + f.user:=chop(':',s); + f.pwd:=s; + usersInVFS.track(f.user, f.pwd); + end; FK_DLCOUNT: f.DLcount:=int_(data); - FK_ACCOUNTS: f.accounts[FA_ACCESS]:=split(';',data); - FK_UPLOADACCOUNTS: f.accounts[FA_UPLOAD]:=split(';',data); - FK_DELETEACCOUNTS: f.accounts[FA_DELETE]:=split(';',data); - FK_FILESFILTER: f.filesfilter:=data; - FK_FOLDERSFILTER: f.foldersfilter:=data; - FK_UPLOADFILTER: f.uploadFilterMask:=data; - FK_REALM: f.realm:=data; - FK_DEFAULTMASK: f.defaultFileMask:=data; - FK_DIFF_TPL: f.diffTpl:=data; - FK_DONTCOUNTASDOWNLOADMASK: f.dontCountAsDownloadMask:=data; + FK_ACCOUNTS: f.accounts[FA_ACCESS]:=split(';',s); + FK_UPLOADACCOUNTS: f.accounts[FA_UPLOAD]:=split(';',s); + FK_DELETEACCOUNTS: f.accounts[FA_DELETE]:=split(';',s); + FK_FILESFILTER: f.filesfilter:=s; + FK_FOLDERSFILTER: f.foldersfilter:=s; + FK_UPLOADFILTER: f.uploadFilterMask:=s; + FK_REALM: f.realm:=s; + FK_DEFAULTMASK: f.defaultFileMask:=s; + FK_DIFF_TPL: f.diffTpl:=s; + FK_DONTCOUNTASDOWNLOADMASK: f.dontCountAsDownloadMask:=s; FK_DONTCOUNTASDOWNLOAD: if boolean(data[1]) then include(f.flags, FA_DONT_COUNT_AS_DL); // legacy, now moved into flags FK_ICON_GIF: if data > '' then f.setupImage(str2pic(data)); FK_AUTOUPDATED_FILES: parseAutoupdatedFiles(data); @@ -9359,7 +9374,8 @@ for act:=low(act) to high(act) do if FA_VIS_ONLY_ANON in f.flags then loadingVFS.visOnlyAnon:=TRUE; -if f.isVirtualFolder() or f.isLink() then f.mtime:=f.atime; +if f.isVirtualFolder() or f.isLink() then + f.mtime:=f.atime; if assigned(f.accounts[FA_UPLOAD]) and (f.resource > '') then addString(f.resource, uploadPaths); f.setupImage(); @@ -12286,7 +12302,6 @@ var dll: HMODULE; INITIALIZATION - randomize(); setErrorMode(SEM_FAILCRITICALERRORS); exePath:=extractFilePath(ExpandFileName(paramStr(0))); diff --git a/utillib.pas b/utillib.pas index 7a57af3..6651ac3 100644 --- a/utillib.pas +++ b/utillib.pas @@ -199,9 +199,10 @@ procedure excludeTrailingString(var s:string; ss:string); function findEOL(s:string; ofs:integer=1; included:boolean=TRUE):integer; function getUniqueName(start:string; exists:TnameExistsFun):string; function getStr(from,to_:pchar):string; -function TLV(t:integer; data:ansistring):ansistring; -function TLV_NOT_EMPTY(t:integer; data:ansistring):ansistring; -function popTLV(var s,data:ansistring):integer; +function TLV(t:integer; s:string):ansistring; overload; +function TLV(t:integer; data:ansistring):ansistring; overload; +function TLV_NOT_EMPTY(t:integer; s:string):ansistring; overload; +function TLV_NOT_EMPTY(t:integer; data:ansistring):ansistring;overload; function getCRC(data:ansistring):integer; function dotted(i:int64):string; function xtpl(src:string; table:array of string):string; overload; @@ -787,21 +788,24 @@ result.right:=strToInt(chop(',',s)); result.bottom:=strToInt(chop(',',s)); end; // strToRect +function TLV(t:integer; s:string):ansistring; +var + raw: ansistring; +begin +raw:=UTF8encode(s); +if length(raw) > length(s) then + inc(t, TLV_UTF8_FLAG); +result:=str_(t)+str_(length(raw))+raw +end; + function TLV(t:integer; data:ansistring):ansistring; begin result:=str_(t)+str_(length(data))+data end; function TLV_NOT_EMPTY(t:integer; data:ansistring):ansistring; begin if data > '' then result:=TLV(t,data) else result:='' end; -// for heavy jobs you are supposed to use class Ttlv -function popTLV(var s,data:ansistring):integer; -begin -result:=-1; -if length(s) < 8 then exit; -result:=Pinteger(@s[1])^; -data:=copy(s,9,Pinteger(@s[5])^); -delete(s,1,8+length(data)); -end; // popTLV +function TLV_NOT_EMPTY(t:integer; s:string):ansistring; +begin if s > '' then result:=TLV(t,s) else result:='' end; function getCRC(data:ansistring):integer; var