Qore CsvUtil Module Reference  1.7.2
AbstractCsvIterator.qc.dox.h
1 // -*- mode: c++; indent-tabs-mode: nil -*-
2 // Qore AbstractCsvIterator class definition
3 
4 /* AbstractCsvIterator.qc Copyright 2012 - 2020 Qore Technologies, s.r.o.
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23 */
24 
25 // assume local var scope, do not use "$" for vars, members, and method calls
26 
28 namespace CsvUtil {
30 
285 class AbstractCsvIterator : public Qore::AbstractIterator, protected CsvHelper {
286 
287 public:
288 protected:
290  const Options = ...;
291 
292 
293  // field separator
294  string separator = ",";
295 
296  // field content delimiter
297  string quote = "\"";
298 
299  // number of header lines
300  softint headerLines = 0;
301 
302  // flag to use string names from the first header row if possible
303  bool headerNames = False;
304 
305  // True if empty lines should be ignored
306  bool ignoreEmptyLines = True;
307 
308  // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
309  bool ignoreWhitespace = True;
310 
311  // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
312  *TimeZone timezone;
313 
314  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
315  bool checkElementCounts = False;
316 
317  // getRecord/getValue returns extended hash
318  bool extendedRecord = False;
319 
320  // force "*string" fields with no value to return an empty string rather than @ref nothing for backwards compatibility with very early versions of CsvUtil
321  bool compat_force_empty_string = False;
322 
323  // read ahead flag
324  bool read_ahead;
325 
326  // column count for verifying column counts
327  int cc;
328 
329  // current record count for the index() method
330  int rc = 0;
331 
332  // to resolve record type by rules
333  hash<string, hash<string, list<hash<auto>>>> m_resolve_by_rule;
334 
335  // to resolve record type by number of fields
336  hash<string, list<string>> m_resolve_by_count;
337 
338  // list of idx to field transformarions, in order of spec
339  hash<string, list<string>> m_resolve_by_idx;
340 
341  // fake specs based on the first non-header row
342  bool fakeHeaderNames;
343 
345  *string eol;
346 
347  // data source iterator
348  AbstractLineIterator lineIterator;
349 
350 public:
351 
353 
360  constructor(AbstractLineIterator li, *hash<auto> opts);
361 
362 
364 
369  // NOTE: when declared as *hash then always calls this constructor
370  constructor(AbstractLineIterator li, hash<auto> spec, hash<auto> opts);
371 
372 
374 protected:
375  processCommonOptions(*hash<auto> opts, int C_OPTx);
376 public:
377 
378 
380 protected:
381  processSpec(hash<auto> spec);
382 public:
383 
384 
386 protected:
387  prepareFieldsFromHeaders(*list<auto> headers);
388 public:
389 
390 
391  bool valid();
392 
393 
395 
400  bool next();
401 
402 
404 
406  peek();
407 
408 
410 
417  auto memberGate(string name);
418 
419 
421 
432  hash<auto> getValue();
433 
434 
436 
449  hash<auto> getRecord(bool extended);
450 
451 
453 
464  hash<auto> getRecord();
465 
466 
468 
480  auto getRecordList();
481 
482 
484 
491  string getSeparator();
492 
493 
495 
502  string getQuote();
503 
504 
506  *hash<string, AbstractDataField> getRecordType();
507 
508 
510 
517  *list<string> getHeaders();
518 
519 
521 
526  *list<string> getHeaders(string type);
527 
528 
530 
541  int index();
542 
543 
545 
558  int lineNumber();
559 
560 
562 
571  string getRawLine();
572 
573 
575 
585  list<*string> getRawLineValues();
586 
587 
588 protected:
589  auto handleType(hash<auto> fh, *string val);
590 public:
591 
592 
594 protected:
595  list<*string> getLineAndSplit();
596 public:
597 
598 
600 
607  string identifyType(list<auto> rec);
608 
609 
611 
618 protected:
619  *string identifyTypeImpl(list<auto> rec);
620 public:
621 
622 
624 protected:
625  hash<auto> parseLine();
626 public:
627 
628  }; // AbstractCsvIterator class
629 }; // CsvUtil namespace
int index()
Returns the row index being iterated, which does not necessarily correspond to the line number when t...
the AbstractCsvIterator class is an abstract base class that allows abstract CSV data to be iterated ...
Definition: AbstractCsvIterator.qc.dox.h:285
processSpec(hash< auto > spec)
process specification and assing internal data for resolving
prepareFieldsFromHeaders(*list< auto > headers)
match headers provided at csv header or in options, never called for multi-type because header_names ...
const True
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: AbstractCsvIterator.qc.dox.h:290
string getQuote()
Returns the current quote string.
*string identifyTypeImpl(list< auto > rec)
Identify a input record, given the raw line string. This method performs a lookup to a precalculated ...
*list< string > getHeaders()
Returns the current record headers or NOTHING if no headers have been detected or saved yet...
processCommonOptions(*hash< auto > opts, int C_OPTx)
process common options and and assing internal fields
list< *string > getRawLineValues()
Returns the list of raw string values of the current line.
const False
hash< auto > getRecord()
Returns the current record as a hash.
string getSeparator()
Returns the current separator string.
string identifyType(list< auto > rec)
Identify a fixed-length line type using identifyTypeImpl(); may be overridden if necessary.
constructor(AbstractLineIterator li, *hash< auto > opts)
creates the AbstractCsvIterator with an option hash in single-type mode
hash< auto > getValue()
Returns the current record as a hash.
string getRawLine()
Returns the current line &#39;as it is&#39;, i.e. the original string.
*hash< string, AbstractDataField > getRecordType()
Returns the description of the record type, if any.
*string eol
the eol marker, if any
Definition: AbstractCsvIterator.qc.dox.h:345
string type(auto arg)
peek()
Reads a single row without moving the index position.
hash< auto > parseLine()
Parses a line in the file and returns a processed list of the fields.
auto memberGate(string name)
Returns the given column value for the current row.
the CsvUtil namespace. All classes used in the CsvUtil module should be inside this namespace ...
Definition: AbstractCsvIterator.qc.dox.h:28
list< *string > getLineAndSplit()
Read line split by separator/quote into list.
auto getRecordList()
Returns the current record as a list.
bool next()
Moves the current line / record position to the next line / record; returns False if there are no mor...
int lineNumber()
Returns the current iterator line number in the file (the first line is line 1) or 0 if not pointing ...