1 import {Injectable} from '@angular/core';
3 const ENTITY_REGEX = /&[^\s]+;/;
6 * Translates HTML text into plain text.
10 export class HtmlToTxtService {
12 unEscapeHtml(text: string): string {
13 text = text.replace(/&/g, '&');
14 text = text.replace(/"/g, '"');
15 text = text.replace(/ /g, ' ');
16 text = text.replace(/</g, '<');
17 text = text.replace(/>/g, '>');
21 // https://stackoverflow.com/questions/7394748
22 entityToChars(text: string): string {
23 if (text && text.match(ENTITY_REGEX)) {
24 const node = document.createElement('textarea');
25 node.innerHTML = text;
31 // Translate an HTML string into plain text.
32 // Removes HTML elements.
33 // Replaces <li> with "*"
34 // Replaces HTML entities with their character equivalent.
35 htmlToTxt(html: string): string {
36 if (!html || html === '') {
40 // First remove multi-line comments.
41 html = html.replace(/<!--(.*?)-->/gs, '');
43 const lines = html.split(/\n/);
46 lines.forEach(line => {
53 line = this.unEscapeHtml(line);
54 line = this.entityToChars(line);
56 line = line.replace(/<head.*?>.*?<\/head>/gi, '');
57 line = line.replace(/<br.*?>/gi, '\r\n');
58 line = line.replace(/<table.*?>/gi, '');
59 line = line.replace(/<\/tr>/gi, '\r\n'); // end of row
60 line = line.replace(/<\/td>/gi, ' '); // end of cell
61 line = line.replace(/<\/th>/gi, ' '); // end of th
62 line = line.replace(/<tr.*?>/gi, '');
63 line = line.replace(/<hr.*?>/gi, '\r\n');
64 line = line.replace(/<p.*?>/gi, '');
65 line = line.replace(/<block.*?>/gi, '');
66 line = line.replace(/<li.*?>/gi, ' * ');
67 line = line.replace(/<.+?>/gi, '');
69 if (line) { newLines.push(line); }
72 return newLines.join('\n');