Add English CSV documentation and improve JavaDoc for CSVWriter

This commit is contained in:
wenshao 2025-08-22 23:57:33 +08:00
parent 77a4dd8218
commit d79f5139b7
3 changed files with 662 additions and 31 deletions

View File

@ -22,6 +22,38 @@ import java.util.function.IntFunction;
import static com.alibaba.fastjson2.util.DateUtils.DEFAULT_ZONE_ID;
/**
* An abstract base class for writing CSV (Comma-Separated Values) data.
*
* <p>This class provides methods for writing various data types to CSV format,
* handling proper escaping and formatting according to CSV standards.
*
* <p>Two implementations are provided:
* <ul>
* <li>{@link CSVWriterUTF8} - for UTF-8 encoded output</li>
* <li>{@link CSVWriterUTF16} - for UTF-16 encoded output</li>
* </ul>
*
* <p>Example usage:
* <pre>
* // Create a CSV writer to a file
* try (CSVWriter writer = CSVWriter.of(new File("data.csv"))) {
* // Write a header row
* writer.writeLine("Name", "Age", "City");
*
* // Write data rows
* writer.writeLine("John Doe", 30, "New York");
* writer.writeLine("Jane Smith", 25, "Los Angeles");
* }
*
* // Create a CSV writer to a string
* try (CSVWriter writer = CSVWriter.of(new StringWriter())) {
* writer.writeLine("ID", "Value");
* writer.writeLine(123, "Some value");
* String csvContent = writer.toString();
* }
* </pre>
*/
public abstract class CSVWriter
implements Closeable, Flushable {
private long features;
@ -30,6 +62,12 @@ public abstract class CSVWriter
int off;
/**
* Constructs a CSVWriter with the specified zone ID and features.
*
* @param zoneId the time zone ID to use for date/time formatting
* @param features optional features to enable for this writer
*/
CSVWriter(ZoneId zoneId, Feature... features) {
for (Feature feature : features) {
this.features |= feature.mask;
@ -38,18 +76,47 @@ public abstract class CSVWriter
this.zoneId = zoneId;
}
/**
* Creates a new CSVWriter with default settings using an in-memory ByteArrayOutputStream.
*
* @return a new CSVWriter instance
*/
public static CSVWriter of() {
return of(new ByteArrayOutputStream());
}
/**
* Creates a new CSVWriter that writes to the specified file using UTF-8 encoding.
*
* @param file the file to write to
* @return a new CSVWriter instance
* @throws FileNotFoundException if the file cannot be opened for writing
*/
public static CSVWriter of(File file) throws FileNotFoundException {
return of(new FileOutputStream(file), StandardCharsets.UTF_8);
}
/**
* Creates a new CSVWriter that writes to the specified file using the specified charset.
*
* @param file the file to write to
* @param charset the charset to use for encoding
* @return a new CSVWriter instance
* @throws FileNotFoundException if the file cannot be opened for writing
*/
public static CSVWriter of(File file, Charset charset) throws FileNotFoundException {
return of(new FileOutputStream(file), charset);
}
/**
* Writes an object as a CSV line by extracting its fields.
*
* <p>If the object is null, an empty line is written.
* If the object has multiple fields, each field value is written as a separate column.
* If the object has a single value field, that value is written as a single column.
*
* @param object the object to write as a CSV line
*/
public final void writeLineObject(Object object) {
if (object == null) {
this.writeLine();
@ -77,6 +144,13 @@ public abstract class CSVWriter
}
}
/**
* Writes a Date value to the CSV.
*
* <p>If the date is null, nothing is written.
*
* @param date the date to write
*/
public final void writeDate(Date date) {
if (date == null) {
return;
@ -85,6 +159,13 @@ public abstract class CSVWriter
writeDate(millis);
}
/**
* Writes an Instant value to the CSV.
*
* <p>If the instant is null, nothing is written.
*
* @param instant the instant to write
*/
public final void writeInstant(Instant instant) {
if (instant == null) {
return;
@ -105,6 +186,13 @@ public abstract class CSVWriter
writeLocalDateTime(ldt);
}
/**
* Writes a LocalDate value to the CSV in ISO format (yyyy-MM-dd).
*
* <p>If the date is null, nothing is written.
*
* @param date the local date to write
*/
public void writeLocalDate(LocalDate date) {
if (date == null) {
return;
@ -113,10 +201,23 @@ public abstract class CSVWriter
writeRaw(str);
}
/**
* Writes a LocalDateTime value to the CSV.
*
* <p>The format used is "yyyy-MM-dd HH:mm:ss" with optional fractional seconds.
*
* @param instant the local datetime to write
*/
public abstract void writeLocalDateTime(LocalDateTime instant);
// protected abstract void writeDirect(byte[] bytes, int off, int len);
/**
* Writes a CSV line with the specified number of columns, using a function to provide values.
*
* @param columnCount the number of columns to write
* @param function a function that provides the value for each column index
*/
public final void writeLine(int columnCount, IntFunction function) {
for (int i = 0; i < columnCount; i++) {
Object value = function.apply(i);
@ -131,6 +232,11 @@ public abstract class CSVWriter
writeLine();
}
/**
* Writes a CSV line with the specified list of values.
*
* @param values the list of values to write as columns
*/
public final void writeLine(List values) {
for (int i = 0; i < values.size(); i++) {
if (i != 0) {
@ -143,6 +249,11 @@ public abstract class CSVWriter
writeLine();
}
/**
* Writes a CSV line with the specified values.
*
* @param values the values to write as columns
*/
public final void writeLine(Object... values) {
for (int i = 0; i < values.length; i++) {
if (i != 0) {
@ -155,12 +266,28 @@ public abstract class CSVWriter
writeLine();
}
/**
* Writes a comma separator to the CSV.
*/
public abstract void writeComma();
/**
* Writes a quote character to the CSV.
*/
protected abstract void writeQuote();
/**
* Writes a line terminator to the CSV.
*/
public abstract void writeLine();
/**
* Writes a value to the CSV, automatically determining the appropriate method based on the value's type.
*
* <p>If the value is null, nothing is written.
*
* @param value the value to write
*/
public void writeValue(Object value) {
if (value == null) {
return;
@ -210,6 +337,13 @@ public abstract class CSVWriter
}
}
/**
* Writes a BigInteger value to the CSV.
*
* <p>If the value is null, nothing is written.
*
* @param value the BigInteger value to write
*/
public void writeBigInteger(BigInteger value) {
if (value == null) {
return;
@ -219,10 +353,25 @@ public abstract class CSVWriter
writeRaw(str);
}
/**
* Writes a boolean value to the CSV.
*
* @param booleanValue the boolean value to write
*/
public abstract void writeBoolean(boolean booleanValue);
/**
* Writes a long integer value to the CSV.
*
* @param longValue the long value to write
*/
public abstract void writeInt64(long longValue);
/**
* Writes a date represented as milliseconds since the epoch to the CSV.
*
* @param millis the milliseconds since the epoch
*/
public final void writeDate(long millis) {
ZoneId zoneId = this.zoneId;
@ -314,8 +463,25 @@ public abstract class CSVWriter
writeRaw(str);
}
/**
* Writes a date in YYYY-MM-DD format to the CSV.
*
* @param year the year
* @param month the month (1-12)
* @param dayOfMonth the day of month (1-31)
*/
public abstract void writeDateYYYMMDD10(int year, int month, int dayOfMonth);
/**
* Writes a date and time in YYYY-MM-DD HH:MM:SS format to the CSV.
*
* @param year the year
* @param month the month (1-12)
* @param dayOfMonth the day of month (1-31)
* @param hour the hour (0-23)
* @param minute the minute (0-59)
* @param second the second (0-59)
*/
public abstract void writeDateTime19(
int year,
int month,
@ -324,35 +490,108 @@ public abstract class CSVWriter
int minute,
int second);
/**
* Writes a string value to the CSV.
*
* <p>The string will be properly escaped if necessary according to CSV standards.
*
* @param value the string value to write
*/
public abstract void writeString(String value);
/**
* Writes an integer value to the CSV.
*
* @param intValue the integer value to write
*/
public abstract void writeInt32(int intValue);
/**
* Writes a double value to the CSV.
*
* @param value the double value to write
*/
public abstract void writeDouble(double value);
/**
* Writes a float value to the CSV.
*
* @param value the float value to write
*/
public abstract void writeFloat(float value);
/**
* Flushes any buffered data to the underlying output stream.
*/
public abstract void flush();
/**
* Writes a byte array as a string value to the CSV.
*
* @param utf8 the UTF-8 encoded byte array to write
*/
public abstract void writeString(byte[] utf8);
/**
* Writes a BigDecimal value to the CSV.
*
* @param value the BigDecimal value to write
*/
public abstract void writeDecimal(BigDecimal value);
/**
* Writes a decimal value represented by an unscaled value and scale to the CSV.
*
* @param unscaledVal the unscaled value
* @param scale the scale (number of decimal places)
*/
public abstract void writeDecimal(long unscaledVal, int scale);
/**
* Writes a raw string to the CSV without any escaping.
*
* @param str the string to write
*/
protected abstract void writeRaw(String str);
/**
* Closes the CSV writer and releases any resources associated with it.
*
* @throws IOException if an I/O error occurs
*/
@Override
public abstract void close() throws IOException;
/**
* Creates a new CSVWriter that writes to the specified OutputStream using UTF-8 encoding.
*
* @param out the OutputStream to write to
* @param features optional features to enable for this writer
* @return a new CSVWriter instance
*/
public static CSVWriter of(OutputStream out, Feature... features) {
return new CSVWriterUTF8(out, StandardCharsets.UTF_8, DEFAULT_ZONE_ID, features);
}
/**
* Creates a new CSVWriter that writes to the specified OutputStream using the specified charset.
*
* @param out the OutputStream to write to
* @param charset the charset to use for encoding
* @return a new CSVWriter instance
*/
public static CSVWriter of(OutputStream out, Charset charset) {
return of(out, charset, DEFAULT_ZONE_ID);
}
/**
* Creates a new CSVWriter that writes to the specified OutputStream using the specified charset and zone ID.
*
* @param out the OutputStream to write to
* @param charset the charset to use for encoding
* @param zoneId the time zone ID to use for date/time formatting
* @return a new CSVWriter instance
*/
public static CSVWriter of(OutputStream out, Charset charset, ZoneId zoneId) {
if (charset == StandardCharsets.UTF_16
|| charset == StandardCharsets.UTF_16LE
@ -368,15 +607,34 @@ public abstract class CSVWriter
return new CSVWriterUTF8(out, charset, zoneId);
}
/**
* Creates a new CSVWriter that writes to the specified Writer.
*
* @param out the Writer to write to
* @return a new CSVWriter instance
*/
public static CSVWriter of(Writer out) {
return new CSVWriterUTF16(out, DEFAULT_ZONE_ID);
}
/**
* Creates a new CSVWriter that writes to the specified Writer using the specified zone ID.
*
* @param out the Writer to write to
* @param zoneId the time zone ID to use for date/time formatting
* @return a new CSVWriter instance
*/
public static CSVWriter of(Writer out, ZoneId zoneId) {
return new CSVWriterUTF16(out, zoneId);
}
/**
* Configuration features for CSV writing.
*/
public enum Feature {
/**
* Always quote string values, even if they don't contain special characters.
*/
AlwaysQuoteStrings(1);
public final long mask;

View File

@ -1,15 +1,18 @@
CSV (comma-separated values)是一种使用逗号作为分隔符的被广泛使用的数据交换文件格式。
# fastjson2 CSV 支持
fastjson提供了一个高性能CSV读写支持。
CSV (Comma-Separated Values) 是一种使用逗号作为分隔符的广泛使用的数据交换文件格式。fastjson2 提供了高性能CSV 读写支持。
# 1. 识别行数
分析CSV格式的文件有多少行如果CSV文件有Header具体数据行数要减1
## 1. 读取 CSV 文件
### 1.1 识别行数
分析 CSV 格式文件的行数,如果 CSV 文件有 Header具体数据行数要减 1
```java
File file = ...;
int rowCount = CSVReader.rowCount(file);
```
# 2. 分析文件内容
### 1.2 分析文件内容
对 CSV 文件进行统计分析,获取各列的统计信息:
```java
File file = ...;
CSVReader parser = CSVReader.of(file);
@ -29,7 +32,7 @@ sql.append("CREATE TABLE ").append(tableName).append(" (\n");
for (int i = 0; i < columns.size(); i++) {
StreamReader.ColumnStat columnStat = columns.get(i);
sql.append('\t')
.append(columnName)
.append(columnName) // 注意:这里应该是实际的列名
.append(' ')
.append(columnStat.getInferSQLType());
@ -41,7 +44,8 @@ for (int i = 0; i < columns.size(); i++) {
sql.append(");");
```
# 3. 缺省按照String类型来读取文件
### 1.3 缺省按照 String 类型读取文件
默认情况下,所有列都按 String 类型读取:
```java
File file = ...;
CSVReader parser = CSVReader.of(file);
@ -54,17 +58,19 @@ while (true) {
if (line == null) {
break;
}
// handler line
// 处理每一行数据
// line[0], line[1], ... 分别对应每一列的数据
}
```
# 4. 指定每列的数据类型来读取文件
### 1.4 指定每列的数据类型读取文件
可以为每列指定具体的数据类型:
```java
File file = ...;
Type[] types = new Type[] {
Integer.class,
Long.class,
String.class ,
String.class,
Date.class
};
// 构造CSVReader传入各列的类型信息
@ -80,66 +86,173 @@ while (true) {
}
// 处理数据每列的值都会和构造CSVReader时传入的types对应
Integer v0 = line[0];
Long v1 = line[1];
String v2 = line[2];
Date v3 = line[3];
Integer v0 = (Integer) line[0];
Long v1 = (Long) line[1];
String v2 = (String) line[2];
Date v3 = (Date) line[3];
}
```
# 5. 将每行数据读取成一个JavaBean
### 1.5 将每行数据读取成 JavaBean
可以将每行数据直接映射到 JavaBean 对象:
```java
@Data
class Bean {
long id;
int age;
String name;
Date created;
public class Person {
private long id;
private int age;
private String name;
private Date created;
}
File file = ...;
// 构造CSVReader传入对象类型
SVReader parser = CSVReader.of(file, Bean.clss);
CSVReader<Person> parser = CSVReader.of(file, Person.class);
// 根据需要先读取Header第一行如果没有Header可以忽略
parser.readHeader();
while (true) {
Bean object = (Bean) parser.readLineObject();
if (object == null) {
Person person = parser.readLineObject();
if (person == null) {
break;
}
// 处理数据 ...
// 处理数据
System.out.println("ID: " + person.id + ", Name: " + person.name);
}
```
## 5.1 使用Lambda Consumer来读取JavaBean
### 1.6 使用 Lambda Consumer 读取 JavaBean
使用函数式编程方式处理所有数据:
```java
File file = ...;
// 构造CSVReader传入对象类型
SVReader parser = CSVReader.of(file, Bean.clss);
CSVReader<Person> parser = CSVReader.of(file, Person.class);
// 根据需要是否要读取Header第一行
boolean readHeader = true;
parser.readLineObjectAll(
readHeader,
e -> {
处理数据 ...
person -> {
// 处理每个Person对象
System.out.println("ID: " + person.id + ", Name: " + person.name);
}
);
```
# 6. 写入CSV格式文件
### 1.7 一次性读取所有行
可以一次性读取所有行数据:
```java
File file = ...;
CSVReader parser = CSVReader.of(file);
// 读取Header可选
parser.readHeader();
// 一次性读取所有行
List<String[]> allLines = parser.readLineAll();
// 处理所有行
for (String[] line : allLines) {
// 处理每一行
}
```
## 2. 写入 CSV 文件
### 2.1 基本写入操作
```java
File file = ...;
CSVWriter writer = CSVWriter.of(file, StandardCharsets.UTF_8);
// 写入Header
writer.writeLine("ID", "Name", "Age", "Created");
// 写入数据行
writer.writeLine(1001, "Alice", 25, new Date());
writer.writeLine(1002, "Bob", 30, new Date());
// 关闭writer
writer.close();
```
### 2.2 使用对象写入
```java
// 定义数据类
@Data
public class Person {
private long id;
private String name;
private int age;
private Date created;
}
// 创建数据
List<Person> persons = Arrays.asList(
new Person(1001, "Alice", 25, new Date()),
new Person(1002, "Bob", 30, new Date())
);
// 写入CSV文件
File file = ...;
CSVWriter writer = CSVWriter.of(file, StandardCharsets.UTF_8);
// 写入Header
writer.writeLine("ID", "Name", "Age", "Created");
// 写入数据
Object[] row = ...;
writer.writeLine(row);
for (Person person : persons) {
writer.writeLineObject(person);
}
writer.close();
```
### 2.3 使用 StringWriter 写入字符串
```java
StringWriter stringWriter = new StringWriter();
CSVWriter writer = CSVWriter.of(stringWriter);
// 写入数据
writer.writeLine("Name", "Age");
writer.writeLine("Alice", 25);
writer.writeLine("Bob", 30);
// 获取CSV内容
String csvContent = stringWriter.toString();
writer.close();
```
### 2.4 高级写入操作
```java
File file = ...;
CSVWriter writer = CSVWriter.of(file, StandardCharsets.UTF_8);
// 写入不同类型的数据
writer.writeInt32(123); // 写入整数
writer.writeComma(); // 写入逗号分隔符
writer.writeString("text"); // 写入字符串
writer.writeComma();
writer.writeDouble(3.14); // 写入双精度浮点数
writer.writeComma();
writer.writeDate(new Date()); // 写入日期
writer.writeLine(); // 写入换行符
writer.close();
```
## 3. 性能优化建议
1. **使用适当的缓冲区大小**CSVReader 和 CSVWriter 内部使用缓冲区来提高性能。
2. **批量操作**:尽可能使用 `writeLine(Object...)``writeLine(List)` 而不是逐个写入字段。
3. **及时关闭资源**:使用 try-with-resources 语句确保 CSVReader/CSVWriter 被正确关闭。
4. **类型预定义**:在读取时预定义列类型可以避免运行时类型转换开销。
## 4. 注意事项
1. **字符编码**:确保读写时使用正确的字符编码,推荐使用 UTF-8。
2. **特殊字符处理**CSVWriter 会自动处理包含逗号、引号和换行符的字符串。
3. **内存使用**:对于大文件,建议使用流式处理而不是一次性加载所有数据。
4. **异常处理**:适当处理可能的 IOException 和数据解析异常。

260
docs/csv_en.md Normal file
View File

@ -0,0 +1,260 @@
# fastjson2 CSV Support
CSV (Comma-Separated Values) is a widely used data exchange file format that uses commas as separators. fastjson2 provides high-performance CSV reading and writing support.
## 1. Reading CSV Files
### 1.1 Counting Rows
Analyze how many rows a CSV format file has. If the CSV file has a header, the actual data row count should subtract 1:
```java
File file = ...;
int rowCount = CSVReader.rowCount(file);
```
### 1.2 Analyzing File Content
Perform statistical analysis on the CSV file to obtain column statistics:
```java
File file = ...;
CSVReader parser = CSVReader.of(file);
// Read the first header row first
parser.readHeader();
// Perform statistical analysis on the data
parser.statAll();
// Get analysis results for each column
List<StreamReader.ColumnStat> columns = parser.getColumnStats();
// Generate table creation statements based on column statistics
StringBuilder sql = new StringBuilder();
sql.append("CREATE TABLE ").append(tableName).append(" (
");
for (int i = 0; i < columns.size(); i++) {
StreamReader.ColumnStat columnStat = columns.get(i);
sql.append('\t')
.append(columnName) // Note: This should be the actual column name
.append(' ')
.append(columnStat.getInferSQLType());
if (i != columns.size() - 1) {
sql.append(',');
}
sql.append("
");
}
sql.append(");");
```
### 1.3 Reading Files with Default String Type
By default, all columns are read as String type:
```java
File file = ...;
CSVReader parser = CSVReader.of(file);
// Read the first header row if needed, can be omitted if there's no header
parser.readHeader();
while (true) {
String[] line = parser.readLine();
if (line == null) {
break;
}
// Process each row of data
// line[0], line[1], ... correspond to each column's data respectively
}
```
### 1.4 Reading Files with Specified Column Data Types
You can specify specific data types for each column:
```java
File file = ...;
Type[] types = new Type[] {
Integer.class,
Long.class,
String.class,
Date.class
};
// Construct CSVReader with column type information
CSVReader parser = CSVReader.of(file, types);
// Read the first header row if needed, can be omitted if there's no header
parser.readHeader();
while (true) {
Object[] line = parser.readLineValues();
if (line == null) {
break;
}
// Process data, each column's value corresponds to the types specified when constructing CSVReader
Integer v0 = (Integer) line[0];
Long v1 = (Long) line[1];
String v2 = (String) line[2];
Date v3 = (Date) line[3];
}
```
### 1.5 Reading Each Row as a JavaBean
You can directly map each row of data to a JavaBean object:
```java
@Data
public class Person {
private long id;
private int age;
private String name;
private Date created;
}
File file = ...;
// Construct CSVReader with object type
CSVReader<Person> parser = CSVReader.of(file, Person.class);
// Read the first header row if needed, can be omitted if there's no header
parser.readHeader();
while (true) {
Person person = parser.readLineObject();
if (person == null) {
break;
}
// Process data
System.out.println("ID: " + person.id + ", Name: " + person.name);
}
```
### 1.6 Using Lambda Consumer to Read JavaBeans
Use functional programming approach to process all data:
```java
File file = ...;
// Construct CSVReader with object type
CSVReader<Person> parser = CSVReader.of(file, Person.class);
// Whether to read the first header row as needed
boolean readHeader = true;
parser.readLineObjectAll(
readHeader,
person -> {
// Process each Person object
System.out.println("ID: " + person.id + ", Name: " + person.name);
}
);
```
### 1.7 Reading All Rows at Once
You can read all row data at once:
```java
File file = ...;
CSVReader parser = CSVReader.of(file);
// Read header (optional)
parser.readHeader();
// Read all rows at once
List<String[]> allLines = parser.readLineAll();
// Process all rows
for (String[] line : allLines) {
// Process each row
}
```
## 2. Writing CSV Files
### 2.1 Basic Writing Operations
```java
File file = ...;
CSVWriter writer = CSVWriter.of(file, StandardCharsets.UTF_8);
// Write header
writer.writeLine("ID", "Name", "Age", "Created");
// Write data rows
writer.writeLine(1001, "Alice", 25, new Date());
writer.writeLine(1002, "Bob", 30, new Date());
// Close writer
writer.close();
```
### 2.2 Writing with Objects
```java
// Define data class
@Data
public class Person {
private long id;
private String name;
private int age;
private Date created;
}
// Create data
List<Person> persons = Arrays.asList(
new Person(1001, "Alice", 25, new Date()),
new Person(1002, "Bob", 30, new Date())
);
// Write CSV file
File file = ...;
CSVWriter writer = CSVWriter.of(file, StandardCharsets.UTF_8);
// Write header
writer.writeLine("ID", "Name", "Age", "Created");
// Write data
for (Person person : persons) {
writer.writeLineObject(person);
}
writer.close();
```
### 2.3 Writing to String with StringWriter
```java
StringWriter stringWriter = new StringWriter();
CSVWriter writer = CSVWriter.of(stringWriter);
// Write data
writer.writeLine("Name", "Age");
writer.writeLine("Alice", 25);
writer.writeLine("Bob", 30);
// Get CSV content
String csvContent = stringWriter.toString();
writer.close();
```
### 2.4 Advanced Writing Operations
```java
File file = ...;
CSVWriter writer = CSVWriter.of(file, StandardCharsets.UTF_8);
// Write different types of data
writer.writeInt32(123); // Write integer
writer.writeComma(); // Write comma separator
writer.writeString("text"); // Write string
writer.writeComma();
writer.writeDouble(3.14); // Write double precision floating point
writer.writeComma();
writer.writeDate(new Date()); // Write date
writer.writeLine(); // Write line terminator
writer.close();
```
## 3. Performance Optimization Recommendations
1. **Use appropriate buffer sizes**: CSVReader and CSVWriter internally use buffers to improve performance.
2. **Batch operations**: Use `writeLine(Object...)` or `writeLine(List)` instead of writing fields individually whenever possible.
3. **Close resources promptly**: Use try-with-resources statements to ensure CSVReader/CSVWriter are properly closed.
4. **Predefine types**: Predefining column types during reading can avoid runtime type conversion overhead.
## 4. Important Notes
1. **Character encoding**: Ensure correct character encoding is used during reading and writing. UTF-8 is recommended.
2. **Special character handling**: CSVWriter automatically handles strings containing commas, quotes, and line breaks.
3. **Memory usage**: For large files, it's recommended to use streaming processing instead of loading all data at once.
4. **Exception handling**: Properly handle possible IOException and data parsing exceptions.