fix: unicode support for VFS files

This commit is contained in:
Massimo Melina 2020-05-03 23:42:33 +02:00
parent 384d2d10f4
commit cec3ebb4cb
5 changed files with 90 additions and 78 deletions

View File

@ -193,12 +193,12 @@ type
Ttlv = class Ttlv = class
protected protected
cur, bound: integer; cur, bound: integer;
whole, lastValue: ansistring; whole, lastRaw: ansistring;
stack: array of integer; stack: array of integer;
stackTop: integer; stackTop: integer;
public public
procedure parse(data:ansistring); procedure parse(data:ansistring);
function pop(var value:ansistring):integer; function pop(var value:string; var raw:ansiString):integer;
function down():boolean; function down():boolean;
function up():boolean; function up():boolean;
function getTotal():integer; function getTotal():integer;
@ -208,6 +208,8 @@ type
function getTheRest():ansistring; function getTheRest():ansistring;
end; end;
const TLV_UTF8_FLAG = $1000000;
implementation implementation
uses uses
@ -1144,7 +1146,7 @@ bound:=length(data);
stackTop:=0; stackTop:=0;
end; // parse end; // parse
function Ttlv.pop(var value:ansistring):integer; function Ttlv.pop(var value:string; var raw:ansistring):integer;
var var
n: integer; n: integer;
begin begin
@ -1152,8 +1154,15 @@ result:=-1;
if isOver() then exit; // finished if isOver() then exit; // finished
result:=integer((@whole[cur])^); result:=integer((@whole[cur])^);
n:=Pinteger(@whole[cur+4])^; n:=Pinteger(@whole[cur+4])^;
value:=copy(whole, cur+8, n); raw:=copy(whole, cur+8, n);
lastValue:=value; lastRaw:=raw;
if result and TLV_UTF8_FLAG = 0 then
value:=string(raw)
else
begin
dec(result, TLV_UTF8_FLAG);
value:=UTF8toString(raw);
end;
inc(cur, 8+n); inc(cur, 8+n);
end; // pop end; // pop
@ -1174,7 +1183,7 @@ stack[stackTop]:=bound;
inc(stackTop); inc(stackTop);
bound:=cur; bound:=cur;
dec(cur, length(lastValue)); dec(cur, length(lastRaw));
result:=true; result:=true;
end; // down end; // down

View File

@ -313,9 +313,8 @@ function encodeURL(url:string; nonascii:boolean=TRUE; spaces:boolean=TRUE;
// returns true if address is not suitable for the internet // returns true if address is not suitable for the internet
function isLocalIP(ip:string):boolean; function isLocalIP(ip:string):boolean;
// base64 encoding // base64 encoding
function base64encode(s:string):string; function base64encode(s:ansistring):ansistring;
function base64decode(s:string):string; function base64decode(s:ansistring):ansistring;
function base64decodeA(s:string):Tbytes;
// an ip address where we are listening // an ip address where we are listening
function getIP():string; function getIP():string;
// ensure a string ends with a specific string // ensure a string ends with a specific string
@ -445,7 +444,7 @@ begin
until false; until false;
end; // nonQuotedPos end; // nonQuotedPos
function base64encode(s:string):string; function base64encode(s:ansistring):ansistring;
const const
TABLE='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; TABLE='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
type type
@ -469,7 +468,7 @@ if length(s) mod 3 > 0 then
+ifThen(length(s) mod 3=1,'==',TABLE[1+(p[1] and 15) shl 2+p[2] shr 6]+'='); +ifThen(length(s) mod 3=1,'==',TABLE[1+(p[1] and 15) shl 2+p[2] shr 6]+'=');
end; // base64encode end; // base64encode
function base64decode(s:string):string; function base64decode(s:ansistring):ansistring;
const const
TABLE:array[#43..#122] of byte=( TABLE:array[#43..#122] of byte=(
62,0,0,0,63,52,53,54,55,56,57,58,59,60,61,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7, 62,0,0,0,63,52,53,54,55,56,57,58,59,60,61,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,
@ -490,31 +489,6 @@ while i <= length(s) do
end; end;
end; // base64decode end; // base64decode
function base64decodeA(s:string):Tbytes;
const
TABLE:array[#43..#122] of byte=(
62,0,0,0,63,52,53,54,55,56,57,58,59,60,61,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,
8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,0,0,0,0,0,0,26,27,28,
29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51);
var
i, n: integer;
begin
setLength(result, length(s));
i:=1;
n:=0;
while i <= length(s) do
begin
result[n]:=TABLE[s[i]] shl 2+TABLE[s[i+1]] shr 4;
inc(n);
result[n]:=ifThen(s[i+2]<>'=', TABLE[s[i+1]] shl 4+TABLE[s[i+2]] shr 2);
inc(n);
result[n]:=ifThen(s[i+3]<>'=', TABLE[s[i+2]] shl 6+TABLE[s[i+3]]);
inc(n);
inc(i,4);
end;
setLength(result, n);
end; // base64decode
function decodeURL(url:ansistring; utf8:boolean=TRUE):string; function decodeURL(url:ansistring; utf8:boolean=TRUE):string;
var var
i, j: integer; i, j: integer;
@ -549,8 +523,11 @@ function encodeURL(url:string; nonascii:boolean=TRUE; spaces:boolean=TRUE;
var var
i: integer; i: integer;
encodePerc, encodeUni: set of char; encodePerc, encodeUni: set of char;
a: ansistring;
begin begin
result:=''; result:='';
if url = '' then
exit;
encodeUni:=[]; encodeUni:=[];
if nonascii then encodeUni:=[#128..#255]; if nonascii then encodeUni:=[#128..#255];
encodePerc:=[#0..#31,'#','%','?','"','''','&','<','>',':']; encodePerc:=[#0..#31,'#','%','?','"','''','&','<','>',':'];
@ -561,6 +538,14 @@ if not unicode then
encodePerc:=encodePerc+encodeUni; encodePerc:=encodePerc+encodeUni;
encodeUni:=[]; encodeUni:=[];
end; end;
if nonascii then
begin
a:=UTF8encode(url); // couldn't find a better way to force url to have the UTF8 encoding
i:=length(a);
setLength(url, i);
for i := 1 to i do
url[i]:=char(a[i]);
end;
for i:=1 to length(url) do for i:=1 to length(url) do
if charInSet(url[i], encodePerc) then if charInSet(url[i], encodePerc) then
result:=result+'%'+intToHex(ord(url[i]),2) result:=result+'%'+intToHex(ord(url[i]),2)

View File

@ -2906,7 +2906,6 @@ object mainFrm: TmainFrm
object encodenonasciiChk: TMenuItem object encodenonasciiChk: TMenuItem
AutoCheck = True AutoCheck = True
Caption = 'Encode non-ASCII characters' Caption = 'Encode non-ASCII characters'
Checked = True
end end
object encodePwdUrlChk: TMenuItem object encodePwdUrlChk: TMenuItem
AutoCheck = True AutoCheck = True

View File

@ -2637,8 +2637,6 @@ end; // getIconForTreeview
function encodeURL(s:string; fullEncode:boolean=FALSE):string; function encodeURL(s:string; fullEncode:boolean=FALSE):string;
begin begin
if fullEncode or mainFrm.encodenonasciiChk.checked then
s:=ansiToUTF8(s);
result:=HSlib.encodeURL(s, mainFrm.encodeNonasciiChk.checked, result:=HSlib.encodeURL(s, mainFrm.encodeNonasciiChk.checked,
fullEncode or mainFrm.encodeSpacesChk.checked) fullEncode or mainFrm.encodeSpacesChk.checked)
end; // encodeURL end; // encodeURL
@ -6318,7 +6316,8 @@ type
E_ZIP: E_ZIP:
begin begin
result:=zCompressStr2(s, zcMax); result:=zCompressStr2(s, zcMax);
if length(result) > round(0.95*length(s)) then result:=s; if length(result) > round(0.95*length(s)) then
result:=s;
result:=base64encode(result); result:=base64encode(result);
end; end;
end; end;
@ -6634,9 +6633,14 @@ var
end; end;
end; // loadBanlist end; // loadBanlist
function unzip(s:string):string; function unzip(s:ansistring):ansistring;
var a: Tbytes;
begin begin
try result:=ZDecompressStr(base64decodeA(s)); try
s:=base64decode(s);
setLength(a, length(s));
move(s[1], a[0], length(s));
result:=ZDecompressStr(a)
except end; except end;
end; // unzip end; // unzip
@ -9115,6 +9119,7 @@ const
FK_CRC = 6; FK_CRC = 6;
FK_COMMENT = 7; FK_COMMENT = 7;
FK_USERPWD = 8; FK_USERPWD = 8;
FK_USERPWD_UTF8 = 108;
FK_ADDEDTIME = 9; FK_ADDEDTIME = 9;
FK_DLCOUNT = 10; FK_DLCOUNT = 10;
FK_ROOT = 11; FK_ROOT = 11;
@ -9137,7 +9142,7 @@ const
function Tmainfrm.getVFS(node:Ttreenode=NIL):ansistring; function Tmainfrm.getVFS(node:Ttreenode=NIL):ansistring;
function getAutoupdatedFiles():string; function getAutoupdatedFiles():ansistring;
var var
i: integer; i: integer;
fn: string; fn: string;
@ -9164,7 +9169,7 @@ f:=nodeToFile(node);
commonFields:=TLV(FK_FLAGS, str_(f.flags)) commonFields:=TLV(FK_FLAGS, str_(f.flags))
+TLV_NOT_EMPTY(FK_RESOURCE, f.resource) +TLV_NOT_EMPTY(FK_RESOURCE, f.resource)
+TLV_NOT_EMPTY(FK_COMMENT, f.comment) +TLV_NOT_EMPTY(FK_COMMENT, f.comment)
+if_(f.user>'', TLV(FK_USERPWD, base64encode(f.user+':'+f.pwd))) +if_(f.user>'', TLV(FK_USERPWD_UTF8, base64encode(f.user+':'+f.pwd)))
+TLV_NOT_EMPTY(FK_ACCOUNTS, join(';',f.accounts[FA_ACCESS]) ) +TLV_NOT_EMPTY(FK_ACCOUNTS, join(';',f.accounts[FA_ACCESS]) )
+TLV_NOT_EMPTY(FK_UPLOADACCOUNTS, join(';',f.accounts[FA_UPLOAD])) +TLV_NOT_EMPTY(FK_UPLOADACCOUNTS, join(';',f.accounts[FA_UPLOAD]))
+TLV_NOT_EMPTY(FK_DELETEACCOUNTS, join(';',f.accounts[FA_DELETE])) +TLV_NOT_EMPTY(FK_DELETEACCOUNTS, join(';',f.accounts[FA_DELETE]))
@ -9211,6 +9216,7 @@ resourcestring
var var
data: ansistring; data: ansistring;
s: string;
f: Tfile; f: Tfile;
after: record after: record
resetLetBrowse: boolean; resetLetBrowse: boolean;
@ -9220,17 +9226,18 @@ var
procedure parseAutoupdatedFiles(data:ansistring); procedure parseAutoupdatedFiles(data:ansistring);
var var
s, fn: ansistring; s, fn: string;
raw: ansiString;
begin begin
autoupdatedFiles.Clear(); autoupdatedFiles.Clear();
tlv.down(); tlv.down();
while tlv.pop(s) = FK_NODE do while tlv.pop(s,raw) = FK_NODE do
begin begin
tlv.down(); tlv.down();
while not tlv.isOver() do while not tlv.isOver() do
case tlv.pop(s) of case tlv.pop(s,raw) of
FK_NAME: fn:=s; FK_NAME: fn:=s;
FK_DLCOUNT: autoupdatedFiles.setInt(fn, int_(s)); FK_DLCOUNT: autoupdatedFiles.setInt(fn, int_(raw));
end; end;
tlv.up(); tlv.up();
end; end;
@ -9253,7 +9260,7 @@ f.node:=node;
tlv:=Ttlv.create; tlv:=Ttlv.create;
tlv.parse(vfs); tlv.parse(vfs);
while not tlv.isOver() do while not tlv.isOver() do
case tlv.pop(data) of case tlv.pop(s,data) of
FK_ROOT: FK_ROOT:
begin begin
setVFS(data, rootNode ); setVFS(data, rootNode );
@ -9278,7 +9285,8 @@ while not tlv.isOver() do
{ I was unable to reproduce the bug, but i found that correct data doesn't complain if i add an extra #0. } { I was unable to reproduce the bug, but i found that correct data doesn't complain if i add an extra #0. }
try try
data:=ZDecompressStr(bytesOf(data+#0)); data:=ZDecompressStr(bytesOf(data+#0));
if isAnyMacroIn(data) then loadingVFS.macrosFound:=TRUE; if isAnyMacroIn(data) then
loadingVFS.macrosFound:=TRUE;
setVFS(data, node); setVFS(data, node);
except msgDlg(MSG_ZLIB, MB_ICONERROR) end; except msgDlg(MSG_ZLIB, MB_ICONERROR) end;
FK_FORMAT_VER: FK_FORMAT_VER:
@ -9304,15 +9312,15 @@ while not tlv.isOver() do
if msgDlg(MSG_BADCRC+MSG_BETTERSTOP,MB_ICONERROR+MB_YESNO) = IDYES then if msgDlg(MSG_BADCRC+MSG_BETTERSTOP,MB_ICONERROR+MB_YESNO) = IDYES then
exit; exit;
end; end;
FK_RESOURCE: f.resource:=data; FK_RESOURCE: f.resource:=s;
FK_NAME: FK_NAME:
begin begin
f.name:=data; f.name:=s;
node.text:=data; node.text:=s;
end; end;
FK_FLAGS: move(data[1], f.flags, length(data)); FK_FLAGS: move(data[1], f.flags, length(data));
FK_ADDEDTIME: f.atime:=dt_(data); FK_ADDEDTIME: f.atime:=dt_(data);
FK_COMMENT: f.comment:=data; FK_COMMENT: f.comment:=s;
FK_USERPWD: FK_USERPWD:
begin begin
data:=base64decode(data); data:=base64decode(data);
@ -9320,17 +9328,24 @@ while not tlv.isOver() do
f.pwd:=data; f.pwd:=data;
usersInVFS.track(f.user, f.pwd); usersInVFS.track(f.user, f.pwd);
end; end;
FK_USERPWD_UTF8:
begin
s:=UTF8toString(base64decode(data));
f.user:=chop(':',s);
f.pwd:=s;
usersInVFS.track(f.user, f.pwd);
end;
FK_DLCOUNT: f.DLcount:=int_(data); FK_DLCOUNT: f.DLcount:=int_(data);
FK_ACCOUNTS: f.accounts[FA_ACCESS]:=split(';',data); FK_ACCOUNTS: f.accounts[FA_ACCESS]:=split(';',s);
FK_UPLOADACCOUNTS: f.accounts[FA_UPLOAD]:=split(';',data); FK_UPLOADACCOUNTS: f.accounts[FA_UPLOAD]:=split(';',s);
FK_DELETEACCOUNTS: f.accounts[FA_DELETE]:=split(';',data); FK_DELETEACCOUNTS: f.accounts[FA_DELETE]:=split(';',s);
FK_FILESFILTER: f.filesfilter:=data; FK_FILESFILTER: f.filesfilter:=s;
FK_FOLDERSFILTER: f.foldersfilter:=data; FK_FOLDERSFILTER: f.foldersfilter:=s;
FK_UPLOADFILTER: f.uploadFilterMask:=data; FK_UPLOADFILTER: f.uploadFilterMask:=s;
FK_REALM: f.realm:=data; FK_REALM: f.realm:=s;
FK_DEFAULTMASK: f.defaultFileMask:=data; FK_DEFAULTMASK: f.defaultFileMask:=s;
FK_DIFF_TPL: f.diffTpl:=data; FK_DIFF_TPL: f.diffTpl:=s;
FK_DONTCOUNTASDOWNLOADMASK: f.dontCountAsDownloadMask:=data; FK_DONTCOUNTASDOWNLOADMASK: f.dontCountAsDownloadMask:=s;
FK_DONTCOUNTASDOWNLOAD: if boolean(data[1]) then include(f.flags, FA_DONT_COUNT_AS_DL); // legacy, now moved into flags FK_DONTCOUNTASDOWNLOAD: if boolean(data[1]) then include(f.flags, FA_DONT_COUNT_AS_DL); // legacy, now moved into flags
FK_ICON_GIF: if data > '' then f.setupImage(str2pic(data)); FK_ICON_GIF: if data > '' then f.setupImage(str2pic(data));
FK_AUTOUPDATED_FILES: parseAutoupdatedFiles(data); FK_AUTOUPDATED_FILES: parseAutoupdatedFiles(data);
@ -9359,7 +9374,8 @@ for act:=low(act) to high(act) do
if FA_VIS_ONLY_ANON in f.flags then if FA_VIS_ONLY_ANON in f.flags then
loadingVFS.visOnlyAnon:=TRUE; loadingVFS.visOnlyAnon:=TRUE;
if f.isVirtualFolder() or f.isLink() then f.mtime:=f.atime; if f.isVirtualFolder() or f.isLink() then
f.mtime:=f.atime;
if assigned(f.accounts[FA_UPLOAD]) and (f.resource > '') then if assigned(f.accounts[FA_UPLOAD]) and (f.resource > '') then
addString(f.resource, uploadPaths); addString(f.resource, uploadPaths);
f.setupImage(); f.setupImage();
@ -12286,7 +12302,6 @@ var
dll: HMODULE; dll: HMODULE;
INITIALIZATION INITIALIZATION
randomize(); randomize();
setErrorMode(SEM_FAILCRITICALERRORS); setErrorMode(SEM_FAILCRITICALERRORS);
exePath:=extractFilePath(ExpandFileName(paramStr(0))); exePath:=extractFilePath(ExpandFileName(paramStr(0)));

View File

@ -199,9 +199,10 @@ procedure excludeTrailingString(var s:string; ss:string);
function findEOL(s:string; ofs:integer=1; included:boolean=TRUE):integer; function findEOL(s:string; ofs:integer=1; included:boolean=TRUE):integer;
function getUniqueName(start:string; exists:TnameExistsFun):string; function getUniqueName(start:string; exists:TnameExistsFun):string;
function getStr(from,to_:pchar):string; function getStr(from,to_:pchar):string;
function TLV(t:integer; data:ansistring):ansistring; function TLV(t:integer; s:string):ansistring; overload;
function TLV_NOT_EMPTY(t:integer; data:ansistring):ansistring; function TLV(t:integer; data:ansistring):ansistring; overload;
function popTLV(var s,data:ansistring):integer; function TLV_NOT_EMPTY(t:integer; s:string):ansistring; overload;
function TLV_NOT_EMPTY(t:integer; data:ansistring):ansistring;overload;
function getCRC(data:ansistring):integer; function getCRC(data:ansistring):integer;
function dotted(i:int64):string; function dotted(i:int64):string;
function xtpl(src:string; table:array of string):string; overload; function xtpl(src:string; table:array of string):string; overload;
@ -787,21 +788,24 @@ result.right:=strToInt(chop(',',s));
result.bottom:=strToInt(chop(',',s)); result.bottom:=strToInt(chop(',',s));
end; // strToRect end; // strToRect
function TLV(t:integer; s:string):ansistring;
var
raw: ansistring;
begin
raw:=UTF8encode(s);
if length(raw) > length(s) then
inc(t, TLV_UTF8_FLAG);
result:=str_(t)+str_(length(raw))+raw
end;
function TLV(t:integer; data:ansistring):ansistring; function TLV(t:integer; data:ansistring):ansistring;
begin result:=str_(t)+str_(length(data))+data end; begin result:=str_(t)+str_(length(data))+data end;
function TLV_NOT_EMPTY(t:integer; data:ansistring):ansistring; function TLV_NOT_EMPTY(t:integer; data:ansistring):ansistring;
begin if data > '' then result:=TLV(t,data) else result:='' end; begin if data > '' then result:=TLV(t,data) else result:='' end;
// for heavy jobs you are supposed to use class Ttlv function TLV_NOT_EMPTY(t:integer; s:string):ansistring;
function popTLV(var s,data:ansistring):integer; begin if s > '' then result:=TLV(t,s) else result:='' end;
begin
result:=-1;
if length(s) < 8 then exit;
result:=Pinteger(@s[1])^;
data:=copy(s,9,Pinteger(@s[5])^);
delete(s,1,8+length(data));
end; // popTLV
function getCRC(data:ansistring):integer; function getCRC(data:ansistring):integer;
var var