我有一个任务,我需要格式化 div
像这样从html中获取数据。
<div>
<h4>Some Title</h4>
<p><a href="somelink.com"><strong>Bold and then</strong> normal</a></p>
<p><a href="somelink2.com">Just normal</a></p>
</div>
变成这样
#### Some Title
[**Bold and then** normal](somelink.com) //two spaces after <a> tag for line breaks
[Just normal](somelink2.com)
我有一个函数可以替换所有的 <strong>
和 <em>
标签,但我不知道该如何处理 <a>
标签。
const toMarkdown = (text) => {
let one = text.replace(/<em>/g,'*').replace(/<\/em>/g,'*');
let two = one.replace(/<strong>/g,'**').replace(/<\/strong>/g,'**');
return two
}
const html = `<div>
<h4>Some Title</h4>
<p><a href="somelink.com"><strong>Bold and then</strong> normal</a></p>
<p><a href="somelink2.com">Just normal</a></p>
</div>`
const toMarkdown = (text) => {
let one = text.replace(/<em>/g, '*').replace(/<\/em>/g, '*');
let two = one.replace(/<strong>/g, '**').replace(/<\/strong>/g, '**');
return two
}
console.log(toMarkdown(html))
我会这样做
const html = `<div><h4>Some Title</h4>
<p><a href="somelink.com"><strong>Bold and then</strong> normal</a> <em>bold</em></p>
<p><a href="somelink2.com">Just normal</a></p>
</div>`
const toMarkdown = (text) => {
let obj = document.createElement("div");
obj.innerHTML = text;
[...obj.querySelectorAll("strong, em")].forEach(ele => ele.parentNode.replaceChild(document.createTextNode("*"+ele.textContent+"*"),ele));
[...obj.querySelectorAll("a")].forEach(ele => ele.parentNode.replaceChild(document.createTextNode("["+ele.textContent+"]("+ele.href+")"),ele));
[...obj.querySelectorAll("h4")].forEach(ele => ele.parentNode.replaceChild(document.createTextNode("####"+ele.textContent),ele));
[...obj.querySelectorAll("p")].forEach(ele => ele.parentNode.replaceChild(document.createTextNode(ele.textContent),ele)); // ignore?
[...obj.querySelectorAll("div")].forEach(ele => ele.parentNode.replaceChild(document.createTextNode(ele.textContent),ele));
return obj
}
console.log(toMarkdown(html).innerHTML)