package com.youbenzi.md2.markdown;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map.Entry;
import java.util.regex.Pattern;
import com.youbenzi.md2.markdown.builder.CodeBuilder;
import com.youbenzi.md2.markdown.builder.CommonTextBuilder;
import com.youbenzi.md2.markdown.builder.HeaderBuilder;
import com.youbenzi.md2.markdown.builder.OrderedListBuilder;
import com.youbenzi.md2.markdown.builder.QuoteBuilder;
import com.youbenzi.md2.markdown.builder.UnorderedListBuilder;
public class MDAnalyzer {
private static List<String> mdTokenInLine = Arrays.asList(MDToken.BOLD_WORD, MDToken.ITALIC_WORD, MDToken.ITALIC_WORD_2, MDToken.STRIKE_WORD, MDToken.CODE_WORD, MDToken.IMG, MDToken.LINK);
public static List<Block> analyze(BufferedReader reader){
List<TextOrTable> list = tableFilter(reader);
List<Block> blocks = new ArrayList<Block>();
for (TextOrTable textAndTable : list) {
if(textAndTable.isTable()){
Block block = new Block();
block.setType(BlockType.TABLE);
block.setTableData(textAndTable.getTableData());
blocks.add(block);
}else{
List<Block> tmps = analyzeTextNoTable(textAndTable.getReader());
for (Block block : tmps) {
blocks.add(block);
}
}
}
return blocks;
}
/**
* 不包含table格式的语法解析
* @param reader
* @return
*/
private static List<Block> analyzeTextNoTable(BufferedReader reader){
List<Block> list = new ArrayList<Block>();
try {
List<String> lines = new ArrayList<String>();
String tmp = reader.readLine();
while (tmp != null) { //将内容每一行都存入list中
lines.add(tmp);
tmp = reader.readLine();
}
boolean hasCode = true; //内容是否包含代码格式的标志
for (int idx = 0, si = lines.size(); idx < si; idx++) {
Block block = null;
String str = lines.get(idx);
if(str.trim().equals("")){ //空行直接忽略
continue;
}
if(str.trim().startsWith(MDToken.CODE) && hasCode){
StringBuilder sb = new StringBuilder();
boolean isCodeEnd = false;
for (int idx1 = (idx+1); idx1 < si; idx1++) {
str = lines.get(idx1);
if(str.trim().equals(MDToken.CODE)){ //检查是否有代码结束符
isCodeEnd = true;
idx = idx1;
break;
}else{
sb.append(str+"\n");
}
}
if(isCodeEnd){
block = new CodeBuilder(sb.toString()).bulid();
}else{ //没代码结束符,则下次不会再进来检查代码格式,游标置回代码格式检查之前
idx = idx - 1;
hasCode = false;
continue;
}
}else if(str.startsWith(MDToken.CODE_BLANK)){
Object[] tmps = analyzerList(idx, lines, new ListBuilderCon() {
@Override
public Block newBuilder(String str) {
return new CodeBuilder(str).bulid();
}
@Override
public boolean isRightType(String lineStr) {
return lineStr.startsWith(MDToken.CODE_BLANK);
}
@Override
public StringBuilder how2AppendIfBlank(StringBuilder sb) {
return sb.append("\n");
}
public StringBuilder how2AppendIfNotBlank(StringBuilder sb, String value) {
return sb.append(value.substring(MDToken.CODE_BLANK.length())+"\n");
}
});
idx = (Integer)tmps[0];
block = (Block)tmps[1];
}else if(str.trim().startsWith(MDToken.HEADLINE)){
block = new HeaderBuilder(str).bulid();
}else if(isQuote(str)){
Object[] tmps = analyzerList(idx, lines, new ListBuilderCon() {
public Block newBuilder(String str) {
return new QuoteBuilder(str).bulid();
}
public boolean isRightType(String lineStr) {
return isQuote(lineStr);
}
});
idx = (Integer)tmps[0];
block = (Block)tmps[1];
}else if(isUnOrderedList(str)){
Object[] tmps = analyzerList(idx, lines, new ListBuilderCon() {
public Block newBuilder(String str) {
return new UnorderedListBuilder(str).bulid();
}
public boolean isRightType(String lineStr) {
return isUnOrderedList(lineStr);
}
});
idx = (Integer)tmps[0];
block = (Block)tmps[1];
}else if(isOrderedList(str)){
Object[] tmps = analyzerList(idx, lines, new ListBuilderCon() {
public Block newBuilder(String str) {
return new OrderedListBuilder(str).bulid();
}
public boolean isRightType(String lineStr) {
return isOrderedList(lineStr);
}
});
idx = (Integer)tmps[0];
block = (Block)tmps[1];
}else{
if((idx+1) < si){
String nextStr = lines.get(idx+1);
int lvl = HeaderBuilder.isRightType(nextStr);
if(lvl>0){
block = new HeaderBuilder(str).bulid(lvl);
idx++;
}
}
if(block==null){
block = new CommonTextBuilder(str).bulid();
}
}
if(block!=null){
list.add(block);
}
}
} catch (IOException e) {
e.printStackTrace();
} finally{
if(reader!=null){
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return list;
}
/**
* 筛选表格
* @param reader 缓存的数据
* @return 按文本顺序排列的文本或者表格的列表
*/
public static List<TextOrTable> tableFilter(BufferedReader reader){
List<TextOrTable> list = new ArrayList<TextOrTable>();
List<String> lines = new ArrayList<String>();
try {
String tmp = reader.readLine();
while (tmp != null) { //将内容每一行都存入list中
lines.add(tmp);
tmp = reader.readLine();
}
// boolean inCode = false;
StringBuffer sb = new StringBuffer();
for (int i = 0, l = lines.size(); i < l; i++) {
String str = lines.get(i);
boolean hasTable = false;
if(str.indexOf("|")>-1 ){ //检查是否有table的分隔符
hasTable = true;
boolean isStart = false;
boolean isEnd = false;
if(str.startsWith("\\|")){ //去头
str = str.substring(1);
isStart = true;
}
if(str.endsWith("\\|")){ //去尾
str = str.substring(0, str.length()-1);
isEnd = true;
}
String[] parts = str.split("\\|");
if(parts.length<=1 && !(isStart&&isEnd)){
hasTable = false;
}
}
if(hasTable){
if((i+1)<l){ //检查到符合规范的table头之后,检测下一行是否为 ---|---的类似字符串
String nextLine = lines.get(i+1);
String[] nextParts = nextLine.split("\\|");
for (String part : nextParts) {
part = part.trim().replaceAll("-", "");
if(part.length()>0){
hasTable = false;
}
if(!hasTable){
break;
}
}
}else{
hasTable = false;
}
}
if(hasTable){ //检查到真的有table存在
if(!sb.toString().equals("")){ //把已存入stringbuffer的内容先归档
TextOrTable text = new TextOrTable(false);
text.setReader(new BufferedReader(new StringReader(sb.toString())));
list.add(text);
sb = new StringBuffer(""); //将stringbuffer重新置为空
}
List<List<String>> tableDataList = new ArrayList<List<String>>();
int tableLineNum = i+1; //---|---的行数,此行不能放入table的data
for (int j = i; j < l; j++){
if(j==tableLineNum){
continue;
}
String tableLine = lines.get(j);
String[] cellDatas = tableLine.split("\\|");
if(cellDatas.length>=2){ //此行是table的数据
tableDataList.add(Arrays.asList(cellDatas));
if(j==(l-1)){ //到内容底部,table数据结束,归档
tableDataList = trimTableData(tableDataList);
TextOrTable table = new TextOrTable(true);
table.setTableData(tableDataList);
list.add(table);
i = j; //设置游标,跳出循环
break;
}
}else{ //table数据结束,归档
tableDataL
没有合适的资源?快使用搜索试试~ 我知道了~
能将markdown语法的文档内容,导出为word,pdf,HTML等的文件
共42个文件
java:32个
xml:1个
gradle:1个
3 下载量 131 浏览量
2023-06-03
09:20:59
上传
评论
收藏 13.21MB ZIP 举报
温馨提示
文档导出工具类,能将markdown格式的内容,转为office word,PDF,HTML等等格式的文档。不使用markdown格式的内容,直接调用MD2File的api,生成word,pdf等文档也是可以的。 另外,还可以将MD2File作为markdown转HTML的工具类。
资源推荐
资源详情
资源评论
收起资源包目录
MD2File-master.zip (42个子文件)
MD2File-master
pom.xml 4KB
src
test
.gradle
buildOutputCleanup
cache.properties 49B
cache.properties.lock 2B
built.bin 0B
resources
md_for_test.md 3KB
java
com
yubenzi
test
ProduceTest.java 1019B
main
resources
MSYH.TTF 20.76MB
quote_char.jpg 2KB
java
com
youbenzi
md2
markdown
BlockType.java 212B
TextOrTable.java 698B
builder
CommonTextBuilder.java 654B
OrderedListBuilder.java 321B
HeaderBuilder.java 1KB
UnorderedListBuilder.java 327B
BlockBuilder.java 387B
ListBuilder.java 2KB
CodeBuilder.java 524B
QuoteBuilder.java 352B
Block.java 1KB
MDAnalyzer.java 17KB
ListBuilderCon.java 370B
LinkOrImageBeanTmp.java 775B
ValuePart.java 2KB
MDToken.java 2KB
export
Decorator.java 273B
PDFDecorator5x.java 9KB
FileFactory.java 3KB
DocDecorator.java 414B
builder
PDFDecoratorBuilder5x.java 357B
DecoratorBuilder.java 151B
HTMLDecoratorBuilder.java 259B
DocxDecoratorBuilder.java 329B
DocDecoratorBuilder.java 251B
DocxDecorator.java 11KB
BuilderFactory.java 2KB
HTMLDecorator.java 6KB
util
ImgHelper.java 4KB
MD2FileUtil.java 507B
MDUtil.java 2KB
LICENSE 10KB
build.gradle 820B
.gitignore 190B
共 42 条
- 1
资源评论
Java程序员-张凯
- 粉丝: 1w+
- 资源: 6651
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功