1 /**
2 	D syntax highlighting.
3 
4 	Copyright: © 2015 RejectedSoftware e.K.
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module ddox.highlight;
9 
10 import std.algorithm : any;
11 import std.array : Appender, appender, replace;
12 import std.range;
13 import std.string : strip;
14 import std.uni : isLower, isUpper;
15 
16 
17 /**
18 	Takes a piece of D code and outputs a sequence of HTML elements useful for syntax highlighting.
19 
20 	The output will contain $(LT)span$(GT) elements with the class attribute
21 	set to the kind of entity that it contains. The class names are kept
22 	compatible with the ones used for Google's prettify library: "typ", "kwd",
23 	"com", "str", "lit", "pun", "pln", "spc"
24 
25 	The only addition is "spc", which denotes a special token sequence starting
26 	with a "#", such as "#line" or "#!/bin/sh".
27 
28 	Note that this function will only perform actual syntax highlighting if
29 	the libdparse package is available as a DUB dependency.
30 
31 	---
32 	void main(string[] args)
33 	{
34 		#line 2
35 		import std.stdio; // yeah
36 		writefln("Hello, "~"World!");
37 		Package pack;
38 		ddox.entities.Module mod;
39 	}
40 	---
41 
42 	Params:
43 		dst = Output range where to write the HTML output
44 		code = The D source code to process
45 		ident_render = Optional delegate to customize how (qualified)
46 			identifiers are rendered
47 */
48 void highlightDCode(R)(ref R dst, string code, scope IdentifierRenderCallback ident_render = null)
49 	if (isOutputRange!(R, char))
50 {
51 	string last_class;
52 	dst.highlightDCodeImpl(code, ident_render, last_class);
53 	if (last_class.length) dst.put("</span>");
54 }
55 
56 /// ditto
57 string highlightDCode(string str, IdentifierRenderCallback ident_render = null)
58 {
59 	auto dst = appender!string();
60 	dst.highlightDCode(str, ident_render);
61 	return dst.data;
62 }
63 
64 
65 void highlightDCodeImpl(R)(ref R dst, string code, scope IdentifierRenderCallback ident_render, ref string last_class)
66 	if (isOutputRange!(R, char))
67 {
68 	import dparse.lexer : DLexer, LexerConfig, StringBehavior, StringCache, WhitespaceBehavior,
69 		isBasicType, isKeyword, isStringLiteral, isNumberLiteral,
70 		isOperator, str, tok;
71 	import std.algorithm : endsWith;
72 	import std.string : stripRight;
73 
74 	StringCache cache = StringCache(1024 * 4);
75 
76 	LexerConfig config;
77 	config.stringBehavior = StringBehavior.source;
78 	config.whitespaceBehavior = WhitespaceBehavior.include;
79 
80 	void writeWithClass(string text, string cls)
81 	{
82 		import std.format : formattedWrite;
83 		if (last_class != cls) {
84 			if (last_class.length) dst.put("</span>");
85 			dst.formattedWrite("<span class=\"%s\">", cls);
86 			last_class = cls;
87 		}
88 
89 		foreach (char ch; text) {
90 			switch (ch) {
91 				default: dst.put(ch); break;
92 				case '&': dst.put("&amp;"); break;
93 				case '<': dst.put("&lt;");  break;
94 				case '>': dst.put("&gt;"); break;
95 			}
96 		}
97 	}
98 
99 
100 	auto symbol = appender!string;
101 	auto verbatim_symbol = appender!string;
102 
103 	void flushSymbol()
104 	{
105 		string vsym = verbatim_symbol.data.stripRight();
106 		if (vsym == ".") {
107 			dst.put("<wbr/>");
108 			writeWithClass(".", "pun");
109 		} else {
110 			ident_render(symbol.data, (nested) {
111 				if (nested) {
112 					if (last_class.length) dst.put("</span>");
113 					last_class = null;
114 					string internal_class;
115 					highlightDCodeImpl(dst, vsym, null, internal_class);
116 					if (internal_class.length) dst.put("</span>");
117 				} else highlightDCodeImpl(dst, vsym, null, last_class);
118 			});
119 		}
120 		if (vsym.length < verbatim_symbol.data.length)
121 			writeWithClass(verbatim_symbol.data[vsym.length .. $], last_class.length ? last_class : "pln");
122 		symbol = appender!string();
123 		verbatim_symbol = appender!string();
124 	}
125 
126 	foreach (t; DLexer(cast(ubyte[])code, config, &cache)) {
127 		if (t.type == tok!"whitespace") {
128 			if (symbol.data.length) verbatim_symbol ~= t.text;
129 			else writeWithClass(t.text, last_class.length ? last_class : "pln");
130 			continue;
131 		}
132 
133 
134 		if (ident_render) {
135 			if (t.type == tok!"." && !symbol.data.endsWith(".")) {
136 				symbol ~= ".";
137 				verbatim_symbol ~= ".";
138 				continue;
139 			} else if (t.type == tok!"identifier" && (symbol.data.empty || symbol.data.endsWith("."))) {
140 				symbol ~= t.text;
141 				verbatim_symbol ~= t.text;
142 				continue;
143 			} else if (symbol.data.length) flushSymbol();
144 		}
145 
146 		if (t.type == tok!".") dst.put("<wbr/>");
147 
148 		if (isBasicType(t.type)) writeWithClass(str(t.type), "typ");
149 		else if (isKeyword(t.type)) writeWithClass(str(t.type), "kwd");
150 		else if (t.type == tok!"comment") writeWithClass(t.text, "com");
151 		else if (isStringLiteral(t.type) || t.type == tok!"characterLiteral") writeWithClass(t.text, "str");
152 		else if (isNumberLiteral(t.type)) writeWithClass(t.text, "lit");
153 		else if (isOperator(t.type)) writeWithClass(str(t.type), "pun");
154 		else if (t.type == tok!"specialTokenSequence" || t.type == tok!"scriptLine") writeWithClass(t.text, "spc");
155 		else if (t.text.strip == "string") writeWithClass(t.text, "typ");
156 		else if (t.type == tok!"identifier" && t.text.isCamelCase) writeWithClass(t.text, "typ");
157 		else if (t.type == tok!"identifier") writeWithClass(t.text, "pln");
158 		else if (t.type == tok!"whitespace") writeWithClass(t.text, last_class.length ? last_class : "pln");
159 		else writeWithClass(t.text, "pun");
160 	}
161 
162 	if (symbol.data.length) flushSymbol();
163 }
164 
165 
166 alias IdentifierRenderCallback = void delegate(string ident, scope void delegate(bool) insert_ident);
167 
168 private bool isCamelCase(string text)
169 {
170 	text = text.strip();
171 	if (text.length < 2) return false;
172 	if (!text[0].isUpper) return false;
173 	if (!text.any!(ch => ch.isLower)) return false;
174 	return true;
175 }