1 /**
2 	D syntax highlighting.
3 
4 	Copyright: © 2015 RejectedSoftware e.K.
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module ddox.highlight;
9 
10 import std.algorithm : any;
11 import std.array : Appender, appender, replace;
12 import std.range;
13 import std.string : strip;
14 import std.uni : isLower, isUpper;
15 
16 
17 /**
18 	Takes a piece of D code and outputs a sequence of HTML elements useful for syntax highlighting.
19 
20 	The output will contain $(LT)span$(GT) elements with the class attribute
21 	set to the kind of entity that it contains. The class names are kept
22 	compatible with the ones used for Google's prettify library: "typ", "kwd",
23 	"com", "str", "lit", "pun", "pln", "spc"
24 
25 	The only addition is "spc", which denotes a special token sequence starting
26 	with a "#", such as "#line" or "#!/bin/sh".
27 
28 	Note that this function will only perform actual syntax highlighting if
29 	the libdparse package is available as a DUB dependency.
30 
31 	---
32 	void main(string[] args)
33 	{
34 		#line 2
35 		import std.stdio; // yeah
36 		writefln("Hello, "~"World!");
37 		Package pack;
38 		ddox.entities.Module mod;
39 	}
40 	---
41 
42 	Params:
43 		dst = Output range where to write the HTML output
44 		code = The D source code to process
45 		ident_render = Optional delegate to customize how (qualified)
46 			identifiers are rendered
47 */
48 void highlightDCode(R)(ref R dst, string code, scope IdentifierRenderCallback ident_render = null)
49 	if (isOutputRange!(R, char))
50 {
51 	string last_class;
52 	dst.highlightDCodeImpl(code, ident_render, last_class);
53 	if (last_class.length) dst.put("</span>");
54 }
55 
56 /// ditto
57 string highlightDCode(string str, IdentifierRenderCallback ident_render = null)
58 {
59 	auto dst = appender!string();
60 	dst.highlightDCode(str, ident_render);
61 	return dst.data;
62 }
63 
64 unittest {
65 	void ident_render(string ident, scope void delegate(IdentifierRenderMode, size_t) insert) { insert(IdentifierRenderMode.normal, 0); }
66 	assert(highlightDCode("@safe", &ident_render) == `<span class="kwd">@safe</span>`);
67 	assert(highlightDCode("@safe foo", &ident_render) == `<span class="kwd">@safe </span><span class="pln">foo</span>`);
68 	assert(highlightDCode("@path", &ident_render) == `<span class="pun">@</span><span class="pln">path</span>`);
69 	assert(highlightDCode("@path foo", &ident_render) == `<span class="pun">@</span><span class="pln">path foo</span>`);
70 	assert(highlightDCode("@path(", &ident_render) == `<span class="pun">@</span><span class="pln">path</span><span class="pun">(</span>`);
71 	assert(highlightDCode("@.path", &ident_render) == `<span class="pun">@<wbr/>.</span><span class="pln">path</span>`);
72 	assert(highlightDCode("@ path", &ident_render) == `<span class="pun">@ </span><span class="pln">path</span>`);
73 
74 	assert(highlightDCode("@safe") == `<span class="kwd">@safe</span>`);
75 	assert(highlightDCode("@safe foo") == `<span class="kwd">@safe </span><span class="pln">foo</span>`);
76 	assert(highlightDCode("@path") == `<span class="pun">@</span><span class="pln">path</span>`);
77 	assert(highlightDCode("@path foo") == `<span class="pun">@</span><span class="pln">path foo</span>`);
78 	assert(highlightDCode("@path(") == `<span class="pun">@</span><span class="pln">path</span><span class="pun">(</span>`);
79 	assert(highlightDCode("@.path") == `<span class="pun">@<wbr/>.</span><span class="pln">path</span>`);
80 	assert(highlightDCode("@ path") == `<span class="pun">@ </span><span class="pln">path</span>`);
81 }
82 
83 
84 alias IdentifierRenderCallback = void delegate(string ident, scope void delegate(IdentifierRenderMode mode, size_t nskip) insert_ident);
85 
86 enum IdentifierRenderMode {
87 	normal,
88 	nested
89 }
90 
91 private void highlightDCodeImpl(R)(ref R dst, string code, scope IdentifierRenderCallback ident_render, ref string last_class)
92 	if (isOutputRange!(R, char))
93 {
94 	import dparse.lexer : DLexer, LexerConfig, StringBehavior, StringCache, WhitespaceBehavior,
95 		isBasicType, isKeyword, isStringLiteral, isNumberLiteral,
96 		isOperator, str, tok;
97 	import std.algorithm : endsWith;
98 	import std.string : indexOf, stripRight;
99 
100 	StringCache cache = StringCache(1024 * 4);
101 
102 	LexerConfig config;
103 	config.stringBehavior = StringBehavior.source;
104 	config.whitespaceBehavior = WhitespaceBehavior.include;
105 
106 	void writeWithClass(string text, string cls)
107 	{
108 		import std.format : formattedWrite;
109 		if (last_class != cls) {
110 			if (last_class.length) dst.put("</span>");
111 			dst.formattedWrite("<span class=\"%s\">", cls);
112 			last_class = cls;
113 		}
114 
115 		foreach (char ch; text) {
116 			switch (ch) {
117 				default: dst.put(ch); break;
118 				case '&': dst.put("&amp;"); break;
119 				case '<': dst.put("&lt;");  break;
120 				case '>': dst.put("&gt;"); break;
121 			}
122 		}
123 	}
124 
125 
126 	auto symbol = appender!string;
127 	auto verbatim_symbol = appender!string;
128 
129 	void flushSymbol()
130 	{
131 		string vsym = verbatim_symbol.data.stripRight();
132 		if (vsym == ".") {
133 			dst.put("<wbr/>");
134 			writeWithClass(".", "pun");
135 		} else {
136 			ident_render(symbol.data, (IdentifierRenderMode mode, size_t nskip) {
137 				string dsym = vsym;
138 				while (nskip-- > 0) {
139 					auto idx = dsym.indexOf('.');
140 					if (idx >= 0) dsym = dsym[idx+1 .. $];
141 				}
142 				final switch (mode) with (IdentifierRenderMode) {
143 					case normal:
144 						highlightDCodeImpl(dst, dsym, null, last_class);
145 						break;
146 					case nested:
147 						if (last_class.length) dst.put("</span>");
148 						last_class = null;
149 						string internal_class;
150 						highlightDCodeImpl(dst, dsym, null, internal_class);
151 						if (internal_class.length) dst.put("</span>");
152 						break;
153 				}
154 			});
155 		}
156 		if (vsym.length < verbatim_symbol.data.length)
157 			writeWithClass(verbatim_symbol.data[vsym.length .. $], last_class.length ? last_class : "pln");
158 		symbol = appender!string();
159 		verbatim_symbol = appender!string();
160 	}
161 
162 	bool last_was_at = false;
163 
164 	foreach (t; DLexer(cast(ubyte[])code, config, &cache)) {
165 		if (last_was_at) {
166 			last_was_at = false;
167 			switch (t.text) {
168 				default: writeWithClass("@", "pun"); break;
169 				case "property", "safe", "trusted", "system", "disable", "nogc":
170 					writeWithClass("@", "kwd");
171 					writeWithClass(t.text, "kwd");
172 					continue;
173 			}
174 		}
175 
176 		if (t.type == tok!"whitespace") {
177 			if (symbol.data.length) verbatim_symbol ~= t.text;
178 			else writeWithClass(t.text, last_class.length ? last_class : "pln");
179 			continue;
180 		}
181 
182 
183 		if (ident_render) {
184 			if (t.type == tok!"." && !symbol.data.endsWith(".")) {
185 				symbol ~= ".";
186 				verbatim_symbol ~= ".";
187 				continue;
188 			} else if (t.type == tok!"identifier" && (symbol.data.empty || symbol.data.endsWith("."))) {
189 				symbol ~= t.text;
190 				verbatim_symbol ~= t.text;
191 				continue;
192 			} else if (symbol.data.length) flushSymbol();
193 		}
194 
195 		if (t.type == tok!".") dst.put("<wbr/>");
196 
197 		if (t.type == tok!"@") last_was_at = true;
198 		else if (isBasicType(t.type)) writeWithClass(str(t.type), "typ");
199 		else if (isKeyword(t.type)) writeWithClass(str(t.type), "kwd");
200 		else if (t.type == tok!"comment") writeWithClass(t.text, "com");
201 		else if (isStringLiteral(t.type) || t.type == tok!"characterLiteral") writeWithClass(t.text, "str");
202 		else if (isNumberLiteral(t.type)) writeWithClass(t.text, "lit");
203 		else if (isOperator(t.type)) writeWithClass(str(t.type), "pun");
204 		else if (t.type == tok!"specialTokenSequence" || t.type == tok!"scriptLine") writeWithClass(t.text, "spc");
205 		else if (t.text.strip == "string") writeWithClass(t.text, "typ");
206 		else if (t.type == tok!"identifier" && t.text.isCamelCase) writeWithClass(t.text, "typ");
207 		else if (t.type == tok!"identifier") writeWithClass(t.text, "pln");
208 		else if (t.type == tok!"whitespace") writeWithClass(t.text, last_class.length ? last_class : "pln");
209 		else writeWithClass(t.text, "pun");
210 	}
211 
212 	if (last_was_at) writeWithClass("@", "pun");
213 
214 	if (symbol.data.length) flushSymbol();
215 }
216 
217 private bool isCamelCase(string text)
218 {
219 	text = text.strip();
220 	if (text.length < 2) return false;
221 	if (!text[0].isUpper) return false;
222 	if (!text.any!(ch => ch.isLower)) return false;
223 	return true;
224 }
225