You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

712 lines
4.5 KiB

  1. use strict;
  2. my $count = 0;
  3. my $skip = 0;
  4. while(<DATA>) {
  5. next if /^\s+/;
  6. if ($skip) {
  7. $skip=0;
  8. next;
  9. }
  10. $count++ if /^&#/;
  11. $count++ if /^&\w/;
  12. # print "$count: ";
  13. print;
  14. if($count == 2) {
  15. $count = 0;
  16. print "\n";
  17. $skip = 1;
  18. }
  19. }
  20. # print $count, "\n";
  21. __DATA__
  22. "
  23. &#34;
  24. &quot;
  25. quotation mark
  26. '
  27. &#39;
  28. &apos;
  29. apostrophe 
  30. &
  31. &#38;
  32. &amp;
  33. ampersand
  34. <
  35. &#60;
  36. &lt;
  37. less-than
  38. >
  39. &#62;
  40. &gt;
  41. greater-than
  42. ¡
  43. &#161;
  44. &iexcl;
  45. inverted exclamation mark
  46. ¢
  47. &#162;
  48. &cent;
  49. cent
  50. £
  51. &#163;
  52. &pound;
  53. pound
  54. ¤
  55. &#164;
  56. &curren;
  57. currency
  58. ¥
  59. &#165;
  60. &yen;
  61. yen
  62. ¦
  63. &#166;
  64. &brvbar;
  65. broken vertical bar
  66. §
  67. &#167;
  68. &sect;
  69. section
  70. ¨
  71. &#168;
  72. &uml;
  73. spacing diaeresis
  74. ©
  75. &#169;
  76. &copy;
  77. copyright
  78. ª
  79. &#170;
  80. &ordf;
  81. feminine ordinal indicator
  82. «
  83. &#171;
  84. &laquo;
  85. angle quotation mark (left)
  86. ¬
  87. &#172;
  88. &not;
  89. negation
  90. ­
  91. &#173;
  92. &shy;
  93. soft hyphen
  94. ®
  95. &#174;
  96. &reg;
  97. registered trademark
  98. ¯
  99. &#175;
  100. &macr;
  101. spacing macron
  102. °
  103. &#176;
  104. &deg;
  105. degree
  106. ±
  107. &#177;
  108. &plusmn;
  109. plus-or-minus 
  110. ²
  111. &#178;
  112. &sup2;
  113. superscript 2
  114. ³
  115. &#179;
  116. &sup3;
  117. superscript 3
  118. ´
  119. &#180;
  120. &acute;
  121. spacing acute
  122. µ
  123. &#181;
  124. &micro;
  125. micro
  126. &#182;
  127. &para;
  128. paragraph
  129. ·
  130. &#183;
  131. &middot;
  132. middle dot
  133. ¸
  134. &#184;
  135. &cedil;
  136. spacing cedilla
  137. ¹
  138. &#185;
  139. &sup1;
  140. superscript 1
  141. º
  142. &#186;
  143. &ordm;
  144. masculine ordinal indicator
  145. »
  146. &#187;
  147. &raquo;
  148. angle quotation mark (right)
  149. ¼
  150. &#188;
  151. &frac14;
  152. fraction 1/4
  153. ½
  154. &#189;
  155. &frac12;
  156. fraction 1/2
  157. ¾
  158. &#190;
  159. &frac34;
  160. fraction 3/4
  161. ¿
  162. &#191;
  163. &iquest;
  164. inverted question mark
  165. ×
  166. &#215;
  167. &times;
  168. multiplication
  169. ÷
  170. &#247;
  171. &divide;
  172. division
  173. &#8704;
  174. &forall;
  175. for all
  176. &#8706;
  177. &part;
  178. part
  179. &#8719;
  180. &prod;
  181. prod
  182. &#8721;
  183. &sum;
  184. sum
  185. &#8722;
  186. &minus;
  187. minus
  188. &#8734;
  189. &infin;
  190. infinity
  191. &#8745;
  192. &cap;
  193. cap
  194. &#8747;
  195. &int;
  196. integral
  197. &#8776;
  198. &asymp;
  199. almost equal
  200. &#8800;
  201. &ne;
  202. not equal
  203. &#8801;
  204. &equiv;
  205. equivalent
  206. &#8804;
  207. &le;
  208. less or equal
  209. &#8805;
  210. &ge;
  211. greater or equal
  212. Α
  213. &#913;
  214. &Alpha;
  215. Alpha
  216. Β
  217. &#914;
  218. &Beta;
  219. Beta
  220. Γ
  221. &#915;
  222. &Gamma;
  223. Gamma
  224. Δ
  225. &#916;
  226. &Delta;
  227. Delta
  228. Ε
  229. &#917;
  230. &Epsilon;
  231. Epsilon
  232. Ζ
  233. &#918;
  234. &Zeta;
  235. Zeta
  236. Η
  237. &#919;
  238. &Eta;
  239. Eta
  240. Θ
  241. &#920;
  242. &Theta;
  243. Theta
  244. Ι
  245. &#921;
  246. &Iota;
  247. Iota
  248. Κ
  249. &#922;
  250. &Kappa;
  251. Kappa
  252. Λ
  253. &#923;
  254. &Lambda;
  255. Lambda
  256. Μ
  257. &#924;
  258. &Mu;
  259. Mu
  260. Ν
  261. &#925;
  262. &Nu;
  263. Nu
  264. Ξ
  265. &#926;
  266. &Xi;
  267. Xi
  268. Ο
  269. &#927;
  270. &Omicron;
  271. Omicron
  272. Π
  273. &#928;
  274. &Pi;
  275. Pi
  276. Ρ
  277. &#929;
  278. &Rho;
  279. Rho
  280. Σ
  281. &#931;
  282. &Sigma;
  283. Sigma
  284. Τ
  285. &#932;
  286. &Tau;
  287. Tau
  288. Υ
  289. &#933;
  290. &Upsilon;
  291. Upsilon
  292. Φ
  293. &#934;
  294. &Phi;
  295. Phi
  296. Χ
  297. &#935;
  298. &Chi;
  299. Chi
  300. Ψ
  301. &#936;
  302. &Psi;
  303. Psi
  304. Ω
  305. &#937;
  306. &Omega;
  307. Omega
  308.  
  309. α
  310. &#945;
  311. &alpha;
  312. alpha
  313. β
  314. &#946;
  315. &beta;
  316. beta
  317. γ
  318. &#947;
  319. &gamma;
  320. gamma
  321. δ
  322. &#948;
  323. &delta;
  324. delta
  325. ε
  326. &#949;
  327. &epsilon;
  328. epsilon
  329. ζ
  330. &#950;
  331. &zeta;
  332. zeta
  333. η
  334. &#951;
  335. &eta;
  336. eta
  337. θ
  338. &#952;
  339. &theta;
  340. theta
  341. ι
  342. &#953;
  343. &iota;
  344. iota
  345. κ
  346. &#954;
  347. &kappa;
  348. kappa
  349. λ
  350. &#955;
  351. &lambda;
  352. lambda
  353. μ
  354. &#956;
  355. &mu;
  356. mu
  357. ν
  358. &#957;
  359. &nu;
  360. nu
  361. ξ
  362. &#958;
  363. &xi;
  364. xi
  365. ο
  366. &#959;
  367. &omicron;
  368. omicron
  369. π
  370. &#960;
  371. &pi;
  372. pi
  373. ρ
  374. &#961;
  375. &rho;
  376. rho
  377. ς
  378. &#962;
  379. &sigmaf;
  380. sigmaf
  381. σ
  382. &#963;
  383. &sigma;
  384. sigma
  385. τ
  386. &#964;
  387. &tau;
  388. tau
  389. υ
  390. &#965;
  391. &upsilon;
  392. upsilon
  393. φ
  394. &#966;
  395. &phi;
  396. phi
  397. χ
  398. &#967;
  399. &chi;
  400. chi
  401. ψ
  402. &#968;
  403. &psi;
  404. psi
  405. ω
  406. &#969;
  407. &omega;
  408. omega
  409. Œ
  410. &#338;
  411. &OElig;
  412. capital ligature OE
  413. œ
  414. &#339;
  415. &oelig;
  416. small ligature oe
  417. Š
  418. &#352;
  419. &Scaron;
  420. capital S with caron
  421. š
  422. &#353;
  423. &scaron;
  424. small S with caron
  425. Ÿ
  426. &#376;
  427. &Yuml;
  428. capital Y with diaeres
  429. ƒ
  430. &#402;
  431. &fnof;
  432. f with hook
  433. ˆ
  434. &#710;
  435. &circ;
  436. modifier letter circumflex accent
  437. ˜
  438. &#732;
  439. &tilde;
  440. small tilde
  441. &#8204;
  442. &zwnj;
  443. zero width non-joiner
  444. &#8205;
  445. &zwj;
  446. zero width joiner
  447. &#8206;
  448. &lrm;
  449. left-to-right mark
  450. &#8207;
  451. &rlm;
  452. right-to-left mark
  453. &#8211;
  454. &ndash;
  455. en dash
  456. &#8212;
  457. &mdash;
  458. em dash
  459. &#8216;
  460. &lsquo;
  461. left single quotation mark
  462. &#8217;
  463. &rsquo;
  464. right single quotation mark
  465. &#8218;
  466. &sbquo;
  467. single low-9 quotation mark
  468. &#8220;
  469. &ldquo;
  470. left double quotation mark
  471. &#8221;
  472. &rdquo;
  473. right double quotation mark
  474. &#8222;
  475. &bdquo;
  476. double low-9 quotation mark
  477. &#8224;
  478. &dagger;
  479. dagger
  480. &#8225;
  481. &Dagger;
  482. double dagger
  483. &#8226;
  484. &bull;
  485. bullet
  486. &#8230;
  487. &hellip;
  488. horizontal ellipsis
  489. &#8240;
  490. &permil;
  491. per mille 
  492. &#8242;
  493. &prime;
  494. minutes
  495. &#8243;
  496. &Prime;
  497. seconds
  498. &#8249;
  499. &lsaquo;
  500. single left angle quotation
  501. &#8250;
  502. &rsaquo;
  503. single right angle quotation
  504. &#8254;
  505. &oline;
  506. overline
  507. &#8364;
  508. &euro;
  509. euro
  510. &#8482;
  511. &trade;
  512. trademark
  513. &#8592;
  514. &larr;
  515. left arrow
  516. &#8593;
  517. &uarr;
  518. up arrow
  519. &#8594;
  520. &rarr;
  521. right arrow
  522. &#8595;
  523. &darr;
  524. down arrow
  525. &#8596;
  526. &harr;
  527. left right arrow
  528. &#9674;
  529. &loz;
  530. lozenge
  531. &#9824;
  532. &spades;
  533. spade
  534. &#9827;
  535. &clubs;
  536. club
  537. &#9829;
  538. &hearts;
  539. heart
  540. &#9830;
  541. &diams;
  542. diamond