1 2 /** 3 * @name CeL function for CSV data 4 * @fileoverview 5 * 本檔案包含了處理 CSV data 的 functions。 6 * @since 7 */ 8 9 10 if (typeof CeL === 'function') 11 CeL.setup_module('data.CSV', 12 function(library_namespace, load_arguments) { 13 14 // nothing required 15 16 17 18 /** 19 * null module constructor 20 * @class CSV data 的 functions 21 */ 22 CeL.data.CSV 23 = function() { 24 // null module constructor 25 }; 26 27 /** 28 * for JSDT: 有 prototype 才會將之當作 Class 29 */ 30 CeL.data.CSV 31 .prototype = { 32 }; 33 34 35 36 37 38 /* 39 40 TODO: 41 可一筆一筆處理,不佔記憶體。 42 DoEvents 43 44 http://hax.pie4.us/2009/05/lesson-of-regexp-50x-faster-with-just.html 45 GetKeywords: function(str) { 46 o: return '\\b(' + str.replace(/\s+/g, '|') + ')\\b'; 47 x: return '\\b' + str.replace(/\s+/g, '\\b|\\b') + '\\b'; 48 }, 49 50 51 http://www.jsdb.org/ 52 jsdb.from_array 53 jsdb.from_CSV 54 jsdb.from_CSV_file 55 jsdb.select=function( 56 field // [1,0,1,1,1] || '1010100' || 'a,b,c,d' || {a:0,b:1,c:1} 57 ,where // function(o={a:,b:,c:}){..;return select;} || {a:3} || {a:function(a){..;return select;}} || {a://} || {op:'a&&b||c',a:[3,4,6,11],b:[4,5,6],c:32} 58 ) 59 jsdb.concat(table1, table2, id filed/[id fileds] = auto detect) 60 jsdb.from_HTML_TABLE(data,for_every_cell) 61 jsdb.transpose // 轉置 62 jsdb.to_CSV 63 jsdb.to_HTML_TABLE 64 jsdb.to_array(row_first) 65 jsdb.to_object(row_first) 66 67 */ 68 69 CeL.data.CSV 70 . 71 /** 72 * parse CSV data to JSON 讀入 CSV 檔 73 * @param {String} _t CSV text data 74 * @param {Boolean} doCheck check if data is valid 75 * @param {Boolean} hasTitle there's a title line 76 * @return {Array} [ [L1_1,L1_2,..], [L2_1,L2_2,..],.. ] 77 * @memberOf CeL.data.CSV 78 * @example 79 * // to use: 80 * var data=parse_CSV('~'); 81 * data[_line_][_field_] 82 * 83 * // hasTitle: 84 * var data = parse_CSV('~',0,1); 85 * //data[_line_][data.t[_title_]] 86 * 87 * // then: 88 * data.tA = title line 89 * data.t[_field_name_] = field number of title 90 * data.it = ignored title array 91 * data[num] = the num-th line (num: 0,1,2,..) 92 * @see 93 * <a href="http://www.jsdb.org/" accessdate="2010/1/1 0:53">JSDB: JavaScript for databases</a>, 94 * <a href="http://hax.pie4.us/2009/05/lesson-of-regexp-50x-faster-with-just.html" accessdate="2010/1/1 0:53">John Hax: A lesson of RegExp: 50x faster with just one line patch</a> 95 */ 96 parse_CSV = function(_t, doCheck, hasTitle) { 97 if (!_t || !/[^\n]/.test(_t = _t.replace(/\r\n?/g, '\n'))) 98 return; 99 //_t+=_t.slice(-1)!='\n\n'?'\n':'\n';//if(_t.slice(-1)!='\n')_t+='\n';//if(!/\n/.test(_t))_t+='\n'; // 後面一定要[\n]是bug? 100 101 var _f = arguments.callee, _r = [], _a, _b = {}, _i = 0, _m = _f.fd 102 103 /* 104 Here is a workaround for Opera 10.00 alpha build 1139 bug 105 106 '\u10a0'.match(/[^\u10a1]+/) 107 and 108 '\u10a0'.match(/[^"]+/) 109 gives different result. 110 The latter should '\u10a0' but it gives null. 111 112 But 113 '\u10a0'.match(/[^"\u109a]+/) 114 works. 115 116 */ 117 , c = '\u10a0'.match(/[^"]+/) ? '' : '\u109a' 118 ; 119 120 121 for (_m = '((|[^' + _f.td + _m 122 // +c: for Opera bug 123 + c 124 + '\\n][^' + _m 125 // +c: for Opera bug 126 + c 127 + '\\n]*'; _i < 128 // 這裡不加 _f.td 可以 parse 更多狀況 129 _f.td.length; _i++) 130 _a = _f.td.charAt(_i), _b[_a] = new RegExp(_a + _a, 'g'), _m += '|' 131 + _a + '(([^' + _a 132 // +c: for Opera bug 133 + c 134 // 不用 [^'+_a+']+| 快很多 135 + ']|' + _a + _a + '|\\n)*)' + _a; 136 _m += ')[' + _f.fd + '\\n])'; 137 /* 138 _m= 139 '((|[^\'"'+_m+'\\n][^'+_m+'\\n]*|"((""|[^"]|\\n)*)"|\'((\'\'|[^\']|\\n)*)\')['+_m+'\\n])' 140 '((|[^\'"'+_m+'\\n$][^'+_m+'\\n$]*|"((""|[^"]|\\n)*)"|\'((\'\'|[^\']|\\n)*)\')['+_m+'\\n$])' 141 _a='((|[^"\''+_f.fd+'\\n][^'+_f.fd+'\\n]*|"((""|[^"]|\\n)*)"|\'((\'\'|[^\']|\\n)*)\')['+_f.fd+'\\n])',alert(_m+'\n'+_a+'\n'+(_m==_a)); 142 */ 143 //alert( 'now:\n' + new RegExp(_m,'g').source + '\n\nfull:\n' + /((|[^'",;\t\n$][^,;\t\n$]*|'((''|[^']|\n)*)'|"((""|[^"]|\n)*)")[,;\t\n$])/.source); 144 if (doCheck 145 && !new RegExp('^(' + _m + ')+$').test(_t.slice(-1) == '\n' ? _t 146 : _t + '\n')) 147 throw new Error(1, "parse_CSV(): Can't parse data!\npattern: /^" + _m 148 + "$/g"); 149 150 for (_a = [], _i = 0, _m = (_t.slice(-1) == '\n' ? _t : _t + '\n') 151 .match(new RegExp(_m, 'g')); _i < _m.length; _i++) { 152 _a.push(_b[_t = _m[_i].charAt(0)] ? _m[_i].slice(1, -2).replace(_b[_t], 153 _t) : _m[_i].slice(0, -1)); 154 //alert('['+_i+'] '+_m[_i]+'|\n'+_a.slice(-1)); 155 if (_m[_i].slice(-1) == '\n') 156 _r.push(_a), _a = []; 157 } 158 //if(_a.length)_r.push(_a); 159 160 if (typeof hasTitle == 'undefined') 161 hasTitle = _f.hasTitle === null ? 0 : _f.hasTitle; 162 if (hasTitle) { 163 // ignored title array 164 _r.it = []; 165 while (_a = _r.shift(), _a.length < _r[0].length) 166 // 預防 title 有許多行 167 _r.it.push(_a); 168 for (_r.tA = _a, _b = _r.t = {}, _i = 0; _i < _a.length; _i++) 169 _b[_a[_i]] = _i; 170 } 171 172 // _r=[ [L1_1,L1_2,..], [L2_1,L2_2,..],.. ] 173 return _r; 174 }; 175 176 CeL.data.CSV 177 . 178 /** 179 * field delimiter 180 */ 181 parse_CSV.fd = '\\t,;';// :\s 182 CeL.data.CSV 183 . 184 /** 185 * text delimiter 186 */ 187 parse_CSV.td = '"\''; 188 //_.parse_CSV.ld line delimiter: only \n, \r will be ignored. 189 CeL.data.CSV 190 . 191 /** 192 * auto detect.. no title 193 */ 194 parse_CSV.hasTitle = null; 195 //_.parse_CSV.title_word='t'; // data[parse_CSV.title_word]=title row array 196 //_.parse_CSV.fd=';',parse_CSV.td='"',alert(parse_CSV('"dfdf\nsdff";"sdf""sadf\n""as""dfsdf";sdfsadf;"dfsdfdf""dfsadf";sfshgjk',1).join('\n'));WScript.Quit(); 197 198 199 200 // 2007/8/6 17:53:57-22:11:22 201 202 /* 203 test: 204 'dfgdfg,"fgd",dfg' 205 'dfgdfg,"fgd",dfg' 206 207 'sdfsdf','ssdfdf'',''sdf' 208 209 */ 210 /** 211 * 讀入CSV檔<br/> 212 * !! slow !! 213 * @since 2007/8/6 17:53:57-22:11:22 214 * @see 可參考 JKL.ParseXML.CSV.prototype.parse_CSV 2007/11/4 15:49:4 215 * @deprecated 廢棄: use parse_CSV() instead 216 * @param FP file path 217 * @param FD field delimiter([,;: ]|\s+) 218 * @param TD text delimiter['"] 219 * @param hasTitle the data has a title line 220 * @return Array contains data 221 */ 222 //readCSVdata[generateCode.dLK]='autodetectEncode,simpleRead,simpleFileAutodetectEncode'; 223 function readCSVdata(FP,FD,TD,hasTitle,enc){ 224 var t=simpleRead(FP,enc||simpleFileAutodetectEncode).replace(/^[\r\n\s]+/,''),r=[],reg={ 225 '"':/"?(([^"]+|"")+)"?([,;: ]|[ \r\n]+)/g, 226 "'":/'?(([^']+|'')+)'?([,;: ]|[ \r\n]+)/g 227 }; 228 // detect delimiter 229 /* 230 if(!FD||!TD){ 231 var a,b,i=0,F='[,;: \s]',T='[\'"]',r=new RegExp('(^'+(TD||T)+'|('+(TD||T)+')('+(FD||F)+')('+(TD||T)+')|'+(TD||T)+'$)','g'); 232 F={},T={}; 233 try{ 234 t.replace(/(^['"]|(['"])([,;: \s])(['"])|['"]$)/g,function($0,$1,$2,$3,$4){ 235 if(!$2)T[$0]=(T[$0]||0)+1; 236 else if($2==$4)T[$2]=(T[$2]||0)+1,F[$3]=(F[$3]||0)+1; 237 if(i++>20)break; 238 return $0; 239 }); 240 }catch(e){} 241 if(!FD){a=b=0;for(i in F)if(F[i]>a)a=F[b=i];FD=b;} 242 if(!TD){a=b=0;for(i in T)if(T[i]>a)a=T[b=i];TD=b;} 243 } 244 */ 245 if(!TD){ 246 l=t.indexOf('\n'); 247 if(l==-1)t.indexOf('\r'); 248 l=(l==-1?t:t.slice(0,l)); 249 if(!l.replace(reg['"'],''))TD='"'; 250 else if(!l.replace(reg["'"],''))TD="'"; 251 else return; 252 } 253 reg=reg[TD]; 254 255 l=[];if(!hasTitle)r.length=1; 256 (t+'\n').replace(reg,function($0,$1,$2,$3){ 257 l.push($1); 258 if(/\r\n/.test($3))r.push(l),l=[]; 259 return ''; 260 }); 261 if(hasTitle) 262 for(l=0,r.t={};l<r[0].length;l++)r.t[r[0][l]]=l; 263 return r; 264 } 265 266 267 toCSV.fd=','; // field delimiter 268 toCSV.td='"'; // text delimiter 269 toCSV.force_td=1; // 是否強制加上 text delimiter 270 toCSV.ld='\n'; // line delimiter 271 function toCSV(o,title){ 272 var CSV=[],_f=arguments.callee,s,r,td=_f.td,a=td,i=0,t=function(t){ 273 var i=0,l=[]; 274 for(;i<t.length;i++) 275 l.push(s&&s.test(t[i])?t[i].replace(r,a):t[i]); 276 i=_f.force_td?(td||''):''; 277 CSV.push(i+l.join(i+_f.fd+i)+i); 278 }; 279 280 if(a)s=new RegExp('\\'+a),r=new RegExp('\\'+a,'g'),a+=a; 281 else if(toCSV.ld=='\n')s=/\n/,r=/\n/g,a='\\n'; 282 if(title)if(title instanceof Array)t(title); 283 284 for(;i<o.length;i++)t(o[i]); 285 286 return CSV.join(_f.ld); 287 } 288 /* old: 289 function quoteCSVfield(t,d){ 290 if(!d)d='"'; 291 for(var i=0,j,rd=new RegExp(d,'g'),d2=d+d;i<t.length;i++){ 292 for(j=0;j<t[i].length;j++) 293 if(typeof t[i][j]=='string')t[i][j]=d+t[i][j].replace(rd,d2)+d; 294 if(t[i] instanceof Array)t[i]=t[i].join(','); 295 } 296 return t.join('\n')+'\n'; 297 } 298 */ 299 300 301 302 303 304 305 306 return ( 307 CeL.data.CSV 308 ); 309 } 310 311 312 ); 313 314