1 
  2 /**
  3  * @name	CeL function for CSV data
  4  * @fileoverview
  5  * 本檔案包含了處理 CSV data 的 functions。
  6  * @since	
  7  */
  8 
  9 
 10 if (typeof CeL === 'function')
 11 CeL.setup_module('data.CSV',
 12 function(library_namespace, load_arguments) {
 13 
 14 //	nothing required
 15 
 16 
 17 
 18 /**
 19  * null module constructor
 20  * @class	CSV data 的 functions
 21  */
 22 CeL.data.CSV
 23 = function() {
 24 	//	null module constructor
 25 };
 26 
 27 /**
 28  * for JSDT: 有 prototype 才會將之當作 Class
 29  */
 30 CeL.data.CSV
 31 .prototype = {
 32 };
 33 
 34 
 35 
 36 
 37 
 38 /*
 39 
 40 TODO:
 41 可一筆一筆處理,不佔記憶體。
 42 DoEvents
 43 
 44 http://hax.pie4.us/2009/05/lesson-of-regexp-50x-faster-with-just.html
 45 GetKeywords: function(str) {
 46  o: return '\\b(' + str.replace(/\s+/g, '|') + ')\\b';
 47  x: return '\\b' + str.replace(/\s+/g, '\\b|\\b') + '\\b';
 48 },
 49 
 50 
 51 http://www.jsdb.org/
 52 jsdb.from_array
 53 jsdb.from_CSV
 54 jsdb.from_CSV_file
 55 jsdb.select=function(
 56 	field	//	[1,0,1,1,1] || '1010100' || 'a,b,c,d' || {a:0,b:1,c:1}
 57 	,where	//	function(o={a:,b:,c:}){..;return select;} || {a:3} || {a:function(a){..;return select;}} || {a://} || {op:'a&&b||c',a:[3,4,6,11],b:[4,5,6],c:32}
 58 	)
 59 jsdb.concat(table1, table2, id filed/[id fileds] = auto detect)
 60 jsdb.from_HTML_TABLE(data,for_every_cell)
 61 jsdb.transpose	//	轉置
 62 jsdb.to_CSV
 63 jsdb.to_HTML_TABLE
 64 jsdb.to_array(row_first)
 65 jsdb.to_object(row_first)
 66 
 67 */
 68 
 69 CeL.data.CSV
 70 .
 71 /**
 72  * parse CSV data to JSON	讀入 CSV 檔
 73  * @param {String} _t	CSV text data
 74  * @param {Boolean} doCheck check if data is valid
 75  * @param {Boolean} hasTitle	there's a title line
 76  * @return	{Array}	[ [L1_1,L1_2,..], [L2_1,L2_2,..],.. ]
 77  * @memberOf	CeL.data.CSV
 78  * @example
 79  * //	to use:
 80  * var data=parse_CSV('~');
 81  * data[_line_][_field_]
 82  *
 83  * //	hasTitle:
 84  * var data = parse_CSV('~',0,1);
 85  * //data[_line_][data.t[_title_]]
 86  *
 87  * //	then:
 88  * data.tA	=	title line
 89  * data.t[_field_name_]	=	field number of title
 90  * data.it	=	ignored title array
 91  * data[num]	=	the num-th line (num: 0,1,2,..)
 92  * @see
 93  * <a href="http://www.jsdb.org/" accessdate="2010/1/1 0:53">JSDB: JavaScript for databases</a>,
 94  * <a href="http://hax.pie4.us/2009/05/lesson-of-regexp-50x-faster-with-just.html" accessdate="2010/1/1 0:53">John Hax: A lesson of RegExp: 50x faster with just one line patch</a>
 95  */
 96 parse_CSV = function(_t, doCheck, hasTitle) {
 97 	if (!_t || !/[^\n]/.test(_t = _t.replace(/\r\n?/g, '\n')))
 98 		return;
 99 	//_t+=_t.slice(-1)!='\n\n'?'\n':'\n';//if(_t.slice(-1)!='\n')_t+='\n';//if(!/\n/.test(_t))_t+='\n';	//	後面一定要[\n]是bug?
100 
101 	var _f = arguments.callee, _r = [], _a, _b = {}, _i = 0, _m = _f.fd
102 
103 /*
104 Here is a workaround for Opera 10.00 alpha build 1139 bug
105 
106 '\u10a0'.match(/[^\u10a1]+/)
107 and
108 '\u10a0'.match(/[^"]+/)
109 gives different result.
110 The latter should '\u10a0' but it gives null.
111 
112 But
113 '\u10a0'.match(/[^"\u109a]+/)
114 works.
115 
116 */
117 	, c = '\u10a0'.match(/[^"]+/) ? '' : '\u109a'
118 	;
119 
120 
121 	for (_m = '((|[^' + _f.td + _m
122 			// +c: for Opera bug
123 			+ c
124 			+ '\\n][^' + _m
125 			// +c: for Opera bug
126 			+ c
127 			+ '\\n]*'; _i <
128 			// 這裡不加  _f.td 可以 parse 更多狀況
129 			_f.td.length; _i++)
130 		_a = _f.td.charAt(_i), _b[_a] = new RegExp(_a + _a, 'g'), _m += '|'
131 			+ _a + '(([^' + _a
132 			// +c: for Opera bug
133 			+ c
134 			// 不用 [^'+_a+']+| 快很多
135 			+ ']|' + _a + _a + '|\\n)*)' + _a;
136 	_m += ')[' + _f.fd + '\\n])';
137 /*
138  _m=
139 	'((|[^\'"'+_m+'\\n][^'+_m+'\\n]*|"((""|[^"]|\\n)*)"|\'((\'\'|[^\']|\\n)*)\')['+_m+'\\n])'
140 	'((|[^\'"'+_m+'\\n$][^'+_m+'\\n$]*|"((""|[^"]|\\n)*)"|\'((\'\'|[^\']|\\n)*)\')['+_m+'\\n$])'
141 _a='((|[^"\''+_f.fd+'\\n][^'+_f.fd+'\\n]*|"((""|[^"]|\\n)*)"|\'((\'\'|[^\']|\\n)*)\')['+_f.fd+'\\n])',alert(_m+'\n'+_a+'\n'+(_m==_a));
142 */
143 	//alert( 'now:\n' + new RegExp(_m,'g').source + '\n\nfull:\n' + /((|[^'",;\t\n$][^,;\t\n$]*|'((''|[^']|\n)*)'|"((""|[^"]|\n)*)")[,;\t\n$])/.source);
144 	if (doCheck
145 			&& !new RegExp('^(' + _m + ')+$').test(_t.slice(-1) == '\n' ? _t
146 					: _t + '\n'))
147 		throw new Error(1, "parse_CSV(): Can't parse data!\npattern: /^" + _m
148 				+ "$/g");
149 
150 	for (_a = [], _i = 0, _m = (_t.slice(-1) == '\n' ? _t : _t + '\n')
151 			.match(new RegExp(_m, 'g')); _i < _m.length; _i++) {
152 		_a.push(_b[_t = _m[_i].charAt(0)] ? _m[_i].slice(1, -2).replace(_b[_t],
153 				_t) : _m[_i].slice(0, -1));
154 		//alert('['+_i+'] '+_m[_i]+'|\n'+_a.slice(-1));
155 		if (_m[_i].slice(-1) == '\n')
156 			_r.push(_a), _a = [];
157 	}
158 	//if(_a.length)_r.push(_a);
159 
160 	if (typeof hasTitle == 'undefined')
161 		hasTitle = _f.hasTitle === null ? 0 : _f.hasTitle;
162 	if (hasTitle) {
163 		// ignored title array
164 		_r.it = [];
165 		while (_a = _r.shift(), _a.length < _r[0].length)
166 			// 預防 title 有許多行
167 			_r.it.push(_a);
168 		for (_r.tA = _a, _b = _r.t = {}, _i = 0; _i < _a.length; _i++)
169 			_b[_a[_i]] = _i;
170 	}
171 
172 	// _r=[ [L1_1,L1_2,..], [L2_1,L2_2,..],.. ]
173 	return _r;
174 };
175 
176 CeL.data.CSV
177 .
178 /**
179 * field delimiter
180 */
181 parse_CSV.fd = '\\t,;';// :\s
182 CeL.data.CSV
183 .
184 /**
185 * text delimiter
186 */
187 parse_CSV.td = '"\'';
188 //_.parse_CSV.ld	line delimiter: only \n, \r will be ignored.
189 CeL.data.CSV
190 .
191 /**
192 * auto detect.. no title
193 */
194 parse_CSV.hasTitle = null;
195 //_.parse_CSV.title_word='t';	//	data[parse_CSV.title_word]=title row array
196 //_.parse_CSV.fd=';',parse_CSV.td='"',alert(parse_CSV('"dfdf\nsdff";"sdf""sadf\n""as""dfsdf";sdfsadf;"dfsdfdf""dfsadf";sfshgjk',1).join('\n'));WScript.Quit();
197 
198 
199 
200 //	2007/8/6 17:53:57-22:11:22
201 
202 /*
203 test:
204 'dfgdfg,"fgd",dfg'
205 'dfgdfg,"fgd",dfg'
206 
207 'sdfsdf','ssdfdf'',''sdf'
208 
209 */
210 /**
211  * 讀入CSV檔<br/>
212  * !! slow !!
213  * @since 2007/8/6 17:53:57-22:11:22
214  * @see 可參考 JKL.ParseXML.CSV.prototype.parse_CSV	2007/11/4 15:49:4
215  * @deprecated 廢棄: use parse_CSV() instead
216  * @param FP file path
217  * @param FD field delimiter([,;:	]|\s+)
218  * @param TD text delimiter['"]
219  * @param hasTitle the data has a title line
220  * @return Array contains data
221  */
222 //readCSVdata[generateCode.dLK]='autodetectEncode,simpleRead,simpleFileAutodetectEncode';
223 function readCSVdata(FP,FD,TD,hasTitle,enc){
224  var t=simpleRead(FP,enc||simpleFileAutodetectEncode).replace(/^[\r\n\s]+/,''),r=[],reg={
225 	'"':/"?(([^"]+|"")+)"?([,;:	]|[ \r\n]+)/g,
226 	"'":/'?(([^']+|'')+)'?([,;:	]|[ \r\n]+)/g
227  };
228  //	detect delimiter
229 /*
230  if(!FD||!TD){
231   var a,b,i=0,F='[,;:	\s]',T='[\'"]',r=new RegExp('(^'+(TD||T)+'|('+(TD||T)+')('+(FD||F)+')('+(TD||T)+')|'+(TD||T)+'$)','g');
232   F={},T={};
233   try{
234    t.replace(/(^['"]|(['"])([,;:	\s])(['"])|['"]$)/g,function($0,$1,$2,$3,$4){
235     if(!$2)T[$0]=(T[$0]||0)+1;
236     else if($2==$4)T[$2]=(T[$2]||0)+1,F[$3]=(F[$3]||0)+1;
237     if(i++>20)break;
238     return $0;
239    });
240   }catch(e){}
241   if(!FD){a=b=0;for(i in F)if(F[i]>a)a=F[b=i];FD=b;}
242   if(!TD){a=b=0;for(i in T)if(T[i]>a)a=T[b=i];TD=b;}
243  }
244 */
245  if(!TD){
246   l=t.indexOf('\n');
247   if(l==-1)t.indexOf('\r');
248   l=(l==-1?t:t.slice(0,l));
249   if(!l.replace(reg['"'],''))TD='"';
250   else if(!l.replace(reg["'"],''))TD="'";
251   else return;
252  }
253  reg=reg[TD];
254 
255  l=[];if(!hasTitle)r.length=1;
256  (t+'\n').replace(reg,function($0,$1,$2,$3){
257 	l.push($1);
258 	if(/\r\n/.test($3))r.push(l),l=[];
259 	return '';
260  });
261  if(hasTitle)
262   for(l=0,r.t={};l<r[0].length;l++)r.t[r[0][l]]=l;
263  return r;
264 }
265 
266 
267 toCSV.fd=',';	//	field delimiter
268 toCSV.td='"';	//	text delimiter
269 toCSV.force_td=1;	//	是否強制加上 text delimiter
270 toCSV.ld='\n';	//	line delimiter
271 function toCSV(o,title){
272  var CSV=[],_f=arguments.callee,s,r,td=_f.td,a=td,i=0,t=function(t){
273 	var i=0,l=[];
274 	for(;i<t.length;i++)
275 	 l.push(s&&s.test(t[i])?t[i].replace(r,a):t[i]);
276 	i=_f.force_td?(td||''):'';
277 	CSV.push(i+l.join(i+_f.fd+i)+i);
278  };
279 
280  if(a)s=new RegExp('\\'+a),r=new RegExp('\\'+a,'g'),a+=a;
281  else if(toCSV.ld=='\n')s=/\n/,r=/\n/g,a='\\n';
282  if(title)if(title instanceof Array)t(title);
283 
284  for(;i<o.length;i++)t(o[i]);
285 
286  return CSV.join(_f.ld);
287 }
288 /*	old:
289 function quoteCSVfield(t,d){
290  if(!d)d='"';
291  for(var i=0,j,rd=new RegExp(d,'g'),d2=d+d;i<t.length;i++){
292   for(j=0;j<t[i].length;j++)
293    if(typeof t[i][j]=='string')t[i][j]=d+t[i][j].replace(rd,d2)+d;
294   if(t[i] instanceof Array)t[i]=t[i].join(',');
295  }
296  return t.join('\n')+'\n';
297 }
298 */
299 
300 
301 
302 
303 
304 
305 
306 return (
307 	CeL.data.CSV
308 );
309 }
310 
311 
312 );
313 
314