1 /**
2 	D syntax highlighting.
3 
4 	Copyright: © 2015 RejectedSoftware e.K.
5 	License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6 	Authors: Sönke Ludwig
7 */
8 module ddox.highlight;
9 
10 import std.algorithm : any;
11 import std.array : Appender, appender, replace;
12 import std.range;
13 import std..string : strip;
14 import std.uni : isLower, isUpper;
15 
16 
17 /**
18 	Takes a piece of D code and outputs a sequence of HTML elements useful for syntax highlighting.
19 
20 	The output will contain $(LT)span$(GT) elements with the class attribute
21 	set to the kind of entity that it contains. The class names are kept
22 	compatible with the ones used for Google's prettify library: "typ", "kwd",
23 	"com", "str", "lit", "pun", "pln", "spc"
24 
25 	The only addition is "spc", which denotes a special token sequence starting
26 	with a "#", such as "#line" or "#!/bin/sh".
27 
28 	Note that this function will only perform actual syntax highlighting if
29 	the libdparse package is available as a DUB dependency.
30 
31 	---
32 	void main(string[] args)
33 	{
34 		#line 2
35 		import std.stdio; // yeah
36 		writefln("Hello, "~"World!");
37 		Package pack;
38 		ddox.entities.Module mod;
39 	}
40 	---
41 
42 	Params:
43 		dst = Output range where to write the HTML output
44 		code = The D source code to process
45 		ident_render = Optional delegate to customize how (qualified)
46 			identifiers are rendered
47 */
48 void highlightDCode(R)(ref R dst, string code, scope IdentifierRenderCallback ident_render = null)
49 	if (isOutputRange!(R, char))
50 {
51 	string last_class;
52 	dst.highlightDCodeImpl(code, ident_render, last_class);
53 	if (last_class.length) dst.put("</span>");
54 }
55 
56 /// ditto
57 string highlightDCode(string str, IdentifierRenderCallback ident_render = null)
58 {
59 	auto dst = appender!string();
60 	dst.highlightDCode(str, ident_render);
61 	return dst.data;
62 }
63 
64 unittest {
65 	void ident_render(string ident, scope void delegate(bool) insert) { insert(false); }
66 	assert(highlightDCode("@safe", &ident_render) == `<span class="kwd">@safe</span>`);
67 	assert(highlightDCode("@safe foo", &ident_render) == `<span class="kwd">@safe </span><span class="pln">foo</span>`);
68 	assert(highlightDCode("@path", &ident_render) == `<span class="pun">@</span><span class="pln">path</span>`);
69 	assert(highlightDCode("@path foo", &ident_render) == `<span class="pun">@</span><span class="pln">path foo</span>`);
70 	assert(highlightDCode("@path(", &ident_render) == `<span class="pun">@</span><span class="pln">path</span><span class="pun">(</span>`);
71 	assert(highlightDCode("@.path", &ident_render) == `<span class="pun">@<wbr/>.</span><span class="pln">path</span>`);
72 	assert(highlightDCode("@ path", &ident_render) == `<span class="pun">@ </span><span class="pln">path</span>`);
73 
74 	assert(highlightDCode("@safe") == `<span class="kwd">@safe</span>`);
75 	assert(highlightDCode("@safe foo") == `<span class="kwd">@safe </span><span class="pln">foo</span>`);
76 	assert(highlightDCode("@path") == `<span class="pun">@</span><span class="pln">path</span>`);
77 	assert(highlightDCode("@path foo") == `<span class="pun">@</span><span class="pln">path foo</span>`);
78 	assert(highlightDCode("@path(") == `<span class="pun">@</span><span class="pln">path</span><span class="pun">(</span>`);
79 	assert(highlightDCode("@.path") == `<span class="pun">@<wbr/>.</span><span class="pln">path</span>`);
80 	assert(highlightDCode("@ path") == `<span class="pun">@ </span><span class="pln">path</span>`);
81 }
82 
83 
84 alias IdentifierRenderCallback = void delegate(string ident, scope void delegate(bool) insert_ident);
85 
86 private void highlightDCodeImpl(R)(ref R dst, string code, scope IdentifierRenderCallback ident_render, ref string last_class)
87 	if (isOutputRange!(R, char))
88 {
89 	import dparse.lexer : DLexer, LexerConfig, StringBehavior, StringCache, WhitespaceBehavior,
90 		isBasicType, isKeyword, isStringLiteral, isNumberLiteral,
91 		isOperator, str, tok;
92 	import std.algorithm : endsWith;
93 	import std..string : stripRight;
94 
95 	StringCache cache = StringCache(1024 * 4);
96 
97 	LexerConfig config;
98 	config.stringBehavior = StringBehavior.source;
99 	config.whitespaceBehavior = WhitespaceBehavior.include;
100 
101 	void writeWithClass(string text, string cls)
102 	{
103 		import std.format : formattedWrite;
104 		if (last_class != cls) {
105 			if (last_class.length) dst.put("</span>");
106 			dst.formattedWrite("<span class=\"%s\">", cls);
107 			last_class = cls;
108 		}
109 
110 		foreach (char ch; text) {
111 			switch (ch) {
112 				default: dst.put(ch); break;
113 				case '&': dst.put("&amp;"); break;
114 				case '<': dst.put("&lt;");  break;
115 				case '>': dst.put("&gt;"); break;
116 			}
117 		}
118 	}
119 
120 
121 	auto symbol = appender!string;
122 	auto verbatim_symbol = appender!string;
123 
124 	void flushSymbol()
125 	{
126 		string vsym = verbatim_symbol.data.stripRight();
127 		if (vsym == ".") {
128 			dst.put("<wbr/>");
129 			writeWithClass(".", "pun");
130 		} else {
131 			ident_render(symbol.data, (nested) {
132 				if (nested) {
133 					if (last_class.length) dst.put("</span>");
134 					last_class = null;
135 					string internal_class;
136 					highlightDCodeImpl(dst, vsym, null, internal_class);
137 					if (internal_class.length) dst.put("</span>");
138 				} else highlightDCodeImpl(dst, vsym, null, last_class);
139 			});
140 		}
141 		if (vsym.length < verbatim_symbol.data.length)
142 			writeWithClass(verbatim_symbol.data[vsym.length .. $], last_class.length ? last_class : "pln");
143 		symbol = appender!string();
144 		verbatim_symbol = appender!string();
145 	}
146 
147 	bool last_was_at = false;
148 
149 	foreach (t; DLexer(cast(ubyte[])code, config, &cache)) {
150 		if (last_was_at) {
151 			last_was_at = false;
152 			switch (t.text) {
153 				default: writeWithClass("@", "pun"); break;
154 				case "property", "safe", "trusted", "system", "disable", "nogc":
155 					writeWithClass("@", "kwd");
156 					writeWithClass(t.text, "kwd");
157 					continue;
158 			}
159 		}
160 
161 		if (t.type == tok!"whitespace") {
162 			if (symbol.data.length) verbatim_symbol ~= t.text;
163 			else writeWithClass(t.text, last_class.length ? last_class : "pln");
164 			continue;
165 		}
166 
167 
168 		if (ident_render) {
169 			if (t.type == tok!"." && !symbol.data.endsWith(".")) {
170 				symbol ~= ".";
171 				verbatim_symbol ~= ".";
172 				continue;
173 			} else if (t.type == tok!"identifier" && (symbol.data.empty || symbol.data.endsWith("."))) {
174 				symbol ~= t.text;
175 				verbatim_symbol ~= t.text;
176 				continue;
177 			} else if (symbol.data.length) flushSymbol();
178 		}
179 
180 		if (t.type == tok!".") dst.put("<wbr/>");
181 
182 		if (t.type == tok!"@") last_was_at = true;
183 		else if (isBasicType(t.type)) writeWithClass(str(t.type), "typ");
184 		else if (isKeyword(t.type)) writeWithClass(str(t.type), "kwd");
185 		else if (t.type == tok!"comment") writeWithClass(t.text, "com");
186 		else if (isStringLiteral(t.type) || t.type == tok!"characterLiteral") writeWithClass(t.text, "str");
187 		else if (isNumberLiteral(t.type)) writeWithClass(t.text, "lit");
188 		else if (isOperator(t.type)) writeWithClass(str(t.type), "pun");
189 		else if (t.type == tok!"specialTokenSequence" || t.type == tok!"scriptLine") writeWithClass(t.text, "spc");
190 		else if (t.text.strip == "string") writeWithClass(t.text, "typ");
191 		else if (t.type == tok!"identifier" && t.text.isCamelCase) writeWithClass(t.text, "typ");
192 		else if (t.type == tok!"identifier") writeWithClass(t.text, "pln");
193 		else if (t.type == tok!"whitespace") writeWithClass(t.text, last_class.length ? last_class : "pln");
194 		else writeWithClass(t.text, "pun");
195 	}
196 
197 	if (last_was_at) writeWithClass("@", "pun");
198 
199 	if (symbol.data.length) flushSymbol();
200 }
201 
202 private bool isCamelCase(string text)
203 {
204 	text = text.strip();
205 	if (text.length < 2) return false;
206 	if (!text[0].isUpper) return false;
207 	if (!text.any!(ch => ch.isLower)) return false;
208 	return true;
209 }
210