package com.google.android.util;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.Set;
import java.util.List;
/**
*
* Logic for parsing a text message typed by the user looking for smileys,
* urls, acronyms,formatting (e.g., '*'s for bold), me commands
* (e.g., "/me is asleep"), and punctuation.
*
* It constructs an array, which breaks the text up into its
* constituent pieces, which we return to the client.
*
*/
public abstract class AbstractMessageParser {
/**
* Interface representing the set of resources needed by a message parser
*
* @author jessan (Jessan Hutchison-Quillian)
*/
public static interface Resources {
/** Get the known set of URL schemes. */
public Set<String> getSchemes();
/** Get the possible values for the last part of a domain name.
* Values are expected to be reversed in the Trie.
*/
public TrieNode getDomainSuffixes();
/** Get the smileys accepted by the parser. */
public TrieNode getSmileys();
/** Get the acronyms accepted by the parser. */
public TrieNode getAcronyms();
}
/**
* Subclasses must define the schemes, domains, smileys and acronyms
* that are necessary for parsing
*/
protected abstract Resources getResources();
/** Music note that indicates user is listening to a music track. */
public static final String musicNote = "\u266B ";
private String text;
private int nextChar;
private int nextClass;
private ArrayList<Part> parts;
private ArrayList<Token> tokens;
private HashMap<Character,Format> formatStart;
private boolean parseSmilies;
private boolean parseAcronyms;
private boolean parseFormatting;
private boolean parseUrls;
private boolean parseMeText;
private boolean parseMusic;
/**
* Create a message parser to parse urls, formatting, acronyms, smileys,
* /me text and music
*
* @param text the text to parse
*/
public AbstractMessageParser(String text) {
this(text, true, true, true, true, true, true);
}
/**
* Create a message parser, specifying the kinds of text to parse
*
* @param text the text to parse
*
*/
public AbstractMessageParser(String text, boolean parseSmilies,
boolean parseAcronyms, boolean parseFormatting, boolean parseUrls,
boolean parseMusic, boolean parseMeText) {
this.text = text;
this.nextChar = 0;
this.nextClass = 10;
this.parts = new ArrayList<Part>();
this.tokens = new ArrayList<Token>();
this.formatStart = new HashMap<Character,Format>();
this.parseSmilies = parseSmilies;
this.parseAcronyms = parseAcronyms;
this.parseFormatting = parseFormatting;
this.parseUrls = parseUrls;
this.parseMusic = parseMusic;
this.parseMeText = parseMeText;
}
/** Returns the raw text being parsed. */
public final String getRawText() { return text; }
/** Return the number of parts. */
public final int getPartCount() { return parts.size(); }
/** Return the part at the given index. */
public final Part getPart(int index) { return parts.get(index); }
/** Return the list of parts from the parsed text */
public final List<Part> getParts() { return parts; }
/** Parses the text string into an internal representation. */
public void parse() {
// Look for music track (of which there would be only one and it'll be the
// first token)
if (parseMusicTrack()) {
buildParts(null);
return;
}
// Look for me commands.
String meText = null;
if (parseMeText && text.startsWith("/me") && (text.length() > 3) &&
Character.isWhitespace(text.charAt(3))) {
meText = text.substring(0, 4);
text = text.substring(4);
}
// Break the text into tokens.
boolean wasSmiley = false;
while (nextChar < text.length()) {
if (!isWordBreak(nextChar)) {
if (!wasSmiley || !isSmileyBreak(nextChar)) {
throw new AssertionError("last chunk did not end at word break");
}
}
if (parseSmiley()) {
wasSmiley = true;
} else {
wasSmiley = false;
if (!parseAcronym() && !parseURL() && !parseFormatting()) {
parseText();
}
}
}
// Trim the whitespace before and after media components.
for (int i = 0; i < tokens.size(); ++i) {
if (tokens.get(i).isMedia()) {
if ((i > 0) && (tokens.get(i - 1) instanceof Html)) {
((Html)tokens.get(i - 1)).trimLeadingWhitespace();
}
if ((i + 1 < tokens.size()) && (tokens.get(i + 1) instanceof Html)) {
((Html)tokens.get(i + 1)).trimTrailingWhitespace();
}
}
}
// Remove any empty html tokens.
for (int i = 0; i < tokens.size(); ++i) {
if (tokens.get(i).isHtml() &&
(tokens.get(i).toHtml(true).length() == 0)) {
tokens.remove(i);
--i; // visit this index again
}
}
buildParts(meText);
}
/**
* Get a the appropriate Token for a given URL
*
* @param text the anchor text
* @param url the url
*
*/
public static Token tokenForUrl(String url, String text) {
if(url == null) {
return null;
}
//Look for video links
Video video = Video.matchURL(url, text);
if (video != null) {
return video;
}
// Look for video links.
YouTubeVideo ytVideo = YouTubeVideo.matchURL(url, text);
if (ytVideo != null) {
return ytVideo;
}
// Look for photo links.
Photo photo = Photo.matchURL(url, text);
if (photo != null) {
return photo;
}
// Look for photo links.
FlickrPhoto flickrPhoto = FlickrPhoto.matchURL(url, text);
if (flickrPhoto != null) {
return flickrPhoto;
}
//Not media, so must be a regular URL
return new Link(url, text);
}
/**
* Builds the parts list.
*
* @param meText any meText parsed from the message
*/
private void buildParts(String meText) {
for (int i = 0; i < tokens.size(); ++i) {
Token token = tokens.get(i);
if (token.isMedia() || (parts.size() == 0) || lastPart().isMedia()) {
parts.add(new Part());
}
lastPart().add(token);
}
// The first part inherits the meText of the line.
if (parts.size() > 0) {
parts.get(0).setMeText(meText);
}
}
/** Returns the last part in the list. */
private Part lastPart() { return parts.get(parts.size() - 1); }
/**
* Looks for a music track (\u266B is first character, everything else is
* track info).
*/
private boolean parseMusicTrack() {
if (parseMusic && text.startsWith(musicNote)) {
addToken(new MusicTrack(text.substring(musicNote.length())));
nextChar = text.length();
return true;
}
return false;
}
/** Consumes all of the text in the next word . */
private void parseText() {
StringBuilder buf = new StringBuilder();
int start = nextChar;
do {
char ch = text.charAt(nextChar++);
switch (ch) {
case '<': buf.append("<"); break;
case '>': buf.append(">"); break;
case '&': buf.append("&"); break;
case '"': buf.append("""); break;
case '\'': buf.append("'"); break;
case '\n': buf.append("<br>"); break;
default: buf.append(ch); break;
}
} while (!isWordBreak(nextChar));
addToken(new Html(text.substring(start, nextChar), buf.toString()));
}
/**
* Looks for smileys (e.g., ":)") in the text. The set of known smileys is
* loaded from a file into a trie at server start.
*/
private boolean parseSmiley() {
if(!parseSmilies) {
return false;
}
TrieNode match = longestMatch(getResources().getSmileys(), this, nextChar,
true);
if (match == null) {
return false;
} else {
int previousCharClass = getCharClass(nextChar -
没有合适的资源?快使用搜索试试~ 我知道了~
AbstractMessageParser.rar_Looking for Me
共1个文件
c:1个
1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉
0 下载量 27 浏览量
2022-09-19
21:21:34
上传
评论
收藏 10KB RAR 举报
温馨提示
Logic for parsing a text message typed by the user looking for smileys, urls, acronyms,formatting (e.g., s for bold), me commands (e.g., " me is asleep"), and punctuation.
资源推荐
资源详情
资源评论
收起资源包目录
AbstractMessageParser.rar (1个子文件)
AbstractMessageParser.c 43KB
共 1 条
- 1
资源评论
林当时
- 粉丝: 98
- 资源: 1万+
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功